http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 16aa452..fc6edb4 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -38,9 +38,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 11 12 -PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage @@ -112,7 +112,7 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +PREHOOK: query: create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -121,7 +121,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: database:default PREHOOK: Output: default@srcpart_date -POSTHOOK: query: create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +POSTHOOK: query: create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -130,6 +130,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_date +POSTHOOK: Lineage: srcpart_date.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] PREHOOK: query: create table srcpart_hour as select hr as hr, hr as hour from srcpart group by hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart @@ -148,7 +150,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_hour -PREHOOK: query: create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr +POSTHOOK: Lineage: srcpart_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -157,7 +161,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: database:default PREHOOK: Output: default@srcpart_date_hour -POSTHOOK: query: create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr +POSTHOOK: query: create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -166,6 +170,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_date_hour +POSTHOOK: Lineage: srcpart_date_hour.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: create table srcpart_double_hour as select (hr*2) as hr, hr as hour from srcpart group by hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart @@ -184,11 +192,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_double_hour +POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -204,10 +214,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -254,10 +264,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -297,8 +307,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -307,7 +317,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -316,7 +326,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -326,9 +336,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -361,10 +371,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -404,8 +414,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -414,7 +424,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -423,7 +433,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -436,23 +446,19 @@ POSTHOOK: Input: default@srcpart_date PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -468,10 +474,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -495,10 +501,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -546,10 +552,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -564,10 +570,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -622,8 +628,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -633,7 +639,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -644,7 +650,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -656,10 +662,10 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -694,10 +700,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -712,10 +718,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -770,8 +776,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -781,7 +787,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -792,7 +798,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -806,19 +812,17 @@ POSTHOOK: Input: default@srcpart_hour PREHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -834,10 +838,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -861,10 +865,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -910,10 +914,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -953,8 +957,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -963,7 +967,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -972,7 +976,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -982,9 +986,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1017,10 +1021,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -1060,8 +1064,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1070,7 +1074,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1079,7 +1083,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1092,19 +1096,17 @@ POSTHOOK: Input: default@srcpart_date_hour PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -1120,10 +1122,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1170,10 +1172,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1213,8 +1215,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1223,7 +1225,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1232,7 +1234,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1242,9 +1244,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1277,10 +1279,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1320,8 +1322,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1330,7 +1332,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1339,7 +1341,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1378,10 +1380,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1428,10 +1430,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1471,8 +1473,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1518,10 +1520,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1560,18 +1562,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + key expressions: (UDFToDouble(_col0) * 2.0) (type: double) sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1588,7 +1590,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1611,8 +1613,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1675,10 +1677,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1718,8 +1720,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1774,18 +1776,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + key expressions: (UDFToDouble(_col0) * 2.0) (type: double) sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1802,7 +1804,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1825,8 +1827,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1857,14 +1859,10 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 @@ -1885,10 +1883,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1927,18 +1925,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + key expressions: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) sort order: + - Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1955,7 +1953,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + 0 UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) 1 UDFToString(_col0) (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1978,8 +1976,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2010,99 +2008,43 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where cast(hr as string) = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where cast(hr as string) = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark - Edges: - Reducer 7 <- Map 6 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ds (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -2110,18 +2052,16 @@ STAGE PLANS: filterExpr: (ds = '2008-04-08') (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + keys: '2008-04-08' (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: '2008-04-08' (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -2129,9 +2069,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2152,21 +2092,21 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: '2008-04-08' (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2174,42 +2114,35 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY POSTHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2262,12 +2195,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 = _col2) or (_col1 = _col4)) (type: boolean) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2288,8 +2221,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2298,8 +2231,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2308,7 +2241,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2319,10 +2252,10 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 1500 PREHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY POSTHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2457,8 +2390,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2467,7 +2400,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2476,7 +2409,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2487,10 +2420,10 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2599,8 +2532,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2609,9 +2542,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2719,8 +2652,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2730,10 +2663,10 @@ STAGE PLANS: ListSink PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2841,8 +2774,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2853,11 +2786,11 @@ STAGE PLANS: PREHOOK: query: -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY POSTHOOK: query: -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2873,10 +2806,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -2911,22 +2844,23 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -2947,11 +2881,13 @@ STAGE PLANS: predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '11' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -2961,11 +2897,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: '11' (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -2996,8 +2933,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -3007,7 +2944,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -3016,7 +2953,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -3026,10 +2963,10 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3048,28 +2985,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + predicate: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator -
<TRUNCATED>