Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out?rev=1656573&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out Mon Feb 2 21:10:08 2015 @@ -0,0 +1,268 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + +PREHOOK: query: EXPLAIN +CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: EXPLAIN +CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + +PREHOOK: query: CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@src +POSTHOOK: query: CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src1_index__ +PREHOOK: query: CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@src +POSTHOOK: query: CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src2_index__ +PREHOOK: query: ALTER INDEX src1_index ON src REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src1_index__ +POSTHOOK: query: ALTER INDEX src1_index ON src REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src1_index__ +POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER INDEX src2_index ON src REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src2_index__ +POSTHOOK: query: ALTER INDEX src2_index ON src REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src2_index__ +POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM default__src_src1_index__ +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src1_index__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM default__src_src1_index__ +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src1_index__ +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM default__src_src2_index__ +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM default__src_src2_index__ +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Reducer 2 (GROUP, 4) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default__src_src1_index__ + Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key = 0) and _bucketname is not null) and _offset is not null) (type: boolean) + Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array<bigint>) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array<bigint>) + Map 4 + Map Operator Tree: + TableScan + alias: default__src_src2_index__ + Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value = 'val_0') and _bucketname is not null) and _offset is not null) (type: boolean) + Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array<bigint>) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array<bigint>) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: bigint) + 1 _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 69 Data size: 6418 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: collect_set(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: array<bigint>) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array<bigint>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +#### A masked pattern was here #### +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src1_index__ +PREHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src1_index__ +POSTHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +PREHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: DROP INDEX src1_index ON src +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@src +POSTHOOK: query: DROP INDEX src1_index ON src +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@src +PREHOOK: query: DROP INDEX src2_index ON src +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@src +POSTHOOK: query: DROP INDEX src2_index ON src +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@src
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out?rev=1656573&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out Mon Feb 2 21:10:08 2015 @@ -0,0 +1,287 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- try the query without indexing, with manual indexing, and with automatic indexing +-- without indexing +SELECT key, value FROM src WHERE key=0 AND value = "val_0" +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- try the query without indexing, with manual indexing, and with automatic indexing +-- without indexing +SELECT key, value FROM src WHERE key=0 AND value = "val_0" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: -- create indices +EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- create indices +EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + +PREHOOK: query: EXPLAIN +CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: EXPLAIN +CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + +PREHOOK: query: CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@src +POSTHOOK: query: CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src1_index__ +PREHOOK: query: CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@src +POSTHOOK: query: CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src2_index__ +PREHOOK: query: ALTER INDEX src1_index ON src REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src1_index__ +POSTHOOK: query: ALTER INDEX src1_index ON src REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src1_index__ +POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER INDEX src2_index ON src REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src2_index__ +POSTHOOK: query: ALTER INDEX src2_index ON src REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src2_index__ +POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM default__src_src1_index__ +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src1_index__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM default__src_src1_index__ +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src1_index__ +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM default__src_src2_index__ +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM default__src_src2_index__ +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +PREHOOK: query: -- manual indexing +EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +PREHOOK: type: QUERY +POSTHOOK: query: -- manual indexing +EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Reducer 2 (GROUP, 4) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default__src_src1_index__ + Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key = 0) and _bucketname is not null) and _offset is not null) (type: boolean) + Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array<bigint>) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array<bigint>) + Map 4 + Map Operator Tree: + TableScan + alias: default__src_src2_index__ + Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value = 'val_0') and _bucketname is not null) and _offset is not null) (type: boolean) + Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array<bigint>) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array<bigint>) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: bigint) + 1 _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 69 Data size: 6418 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: collect_set(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: array<bigint>) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array<bigint>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +#### A masked pattern was here #### +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src1_index__ +PREHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src1_index__ +POSTHOOK: Input: default@default__src_src2_index__ +#### A masked pattern was here #### +PREHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: DROP INDEX src1_index ON src +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@src +POSTHOOK: query: DROP INDEX src1_index ON src +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@src +PREHOOK: query: DROP INDEX src2_index ON src +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@src +POSTHOOK: query: DROP INDEX src2_index ON src +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@src Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out?rev=1656573&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out Mon Feb 2 21:10:08 2015 @@ -0,0 +1,123 @@ +PREHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the +-- a number of files equal to the number of buckets +CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table_bucketed +POSTHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the +-- a number of files equal to the number of buckets +CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table_bucketed +PREHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, +-- this should be bucketed and sorted by value into 3 buckets +INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') +SELECT key, count(1) FROM src GROUP BY KEY +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_bucketed@part=1 +POSTHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, +-- this should be bucketed and sorted by value into 3 buckets +INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') +SELECT key, count(1) FROM src GROUP BY KEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_bucketed@part=1 +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table_bucketed +POSTHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table_bucketed +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_bucketed +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 3 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is +-- bucketed +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is +-- bucketed +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +31 +PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +179 +PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +99 +PREHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' +GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' +GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +31 +179 +99 Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out?rev=1656573&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out Mon Feb 2 21:10:08 2015 @@ -0,0 +1,592 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- that operators in the mapper have no effect + +CREATE TABLE test_table1 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- that operators in the mapper have no effect + +CREATE TABLE test_table1 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key STRING, value STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table_out +POSTHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table_out +PREHOOK: query: -- Test map group by doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test map group by doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_table1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT key, count(*) FROM test_table1 GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table_out +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table_out +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, count(*) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +PREHOOK: type: QUERY +POSTHOOK: query: -- Test map group by doesn't affect inference, should be bucketed and sorted by value +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, count(*) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_table1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(_col1) is not null (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(value) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col1) (type: double) + 1 UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT a.key, a.value FROM ( + SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key +) a JOIN ( + SELECT key, value FROM src +) b +ON (a.value = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)test_table1.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table_out +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table_out +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test SMB join doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test SMB join doesn't affect inference, should not be bucketed or sorted +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table_out +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table_out +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test SMB join doesn't affect inference, should be bucketed and sorted by key +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +PREHOOK: type: QUERY +POSTHOOK: query: -- Test SMB join doesn't affect inference, should be bucketed and sorted by key +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-0 + Move Operator + tables: + partition: + part 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table_out@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +GROUP BY b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table_out@part=1 +POSTHOOK: Lineage: test_table_out PARTITION(part=1).key SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table_out PARTITION(part=1).value EXPRESSION [(test_table1)a.null, (test_table2)b.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table_out +POSTHOOK: query: DESCRIBE FORMATTED test_table_out PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table_out +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_out +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 309 + rawDataSize 2728 + totalSize 3037 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out?rev=1656573&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out Mon Feb 2 21:10:08 2015 @@ -0,0 +1,126 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where where merging may or may not be used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where where merging may or may not be used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +POSTHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted. +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 1028 + rawDataSize 10968 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out?rev=1656573&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out Mon Feb 2 21:10:08 2015 @@ -0,0 +1,218 @@ +PREHOOK: query: CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table +POSTHOOK: query: CREATE TABLE test_table (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write +-- all partitions. The subquery produces rows as follows +-- key = 0: +-- 0, <value>, 0 +-- key = 1: +-- 0, <value>, 1 +-- key = 2: +-- 1, <value>, 0 +-- This means that by distributing by the first column into two reducers, and using the third +-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers +-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key +-- and hr=1 should not. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests dynamic partitions where bucketing/sorting can be inferred, but not all reducers write +-- all partitions. The subquery produces rows as follows +-- key = 0: +-- 0, <value>, 0 +-- key = 1: +-- 0, <value>, 1 +-- key = 2: +-- 1, <value>, 0 +-- This means that by distributing by the first column into two reducers, and using the third +-- columns as a dynamic partition, the dynamic partition for 0 will get written in both reducers +-- and the partition for 1 will get written in one reducer. So hr=0 should be bucketed by key +-- and hr=1 should not. + +EXPLAIN +INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: if(((key % 3) < 2), 0, 1) (type: int), value (type: string), UDFToInteger((key % 2)) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@test_table@ds=2008-04-08 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) +SELECT key2, value, cast(hr as int) FROM +(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr +FROM srcpart +WHERE ds = '2008-04-08') a +DISTRIBUTE BY key2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=0 +POSTHOOK: Output: default@test_table@ds=2008-04-08/hr=1 +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 0] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 670 + rawDataSize 5888 + totalSize 6558 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (ds='2008-04-08', hr='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 330 + rawDataSize 2924 + totalSize 3254 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1
