Repository: hive Updated Branches: refs/heads/master 881e29124 -> a43581b6d
Revert "HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet Garg, reviewed by Rui Li)" This reverts commit 8d084d676539b6ba3b9fd46e86505cca4be95b43. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a43581b6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a43581b6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a43581b6 Branch: refs/heads/master Commit: a43581b6d884a7041930644fb3a580e398bc6951 Parents: 881e291 Author: Vineet Garg <vg...@apache.org> Authored: Wed Dec 12 16:37:37 2018 -0800 Committer: Vineet Garg <vg...@apache.org> Committed: Wed Dec 12 16:37:37 2018 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 - .../ReduceSinkDeDuplicationUtils.java | 32 +-- .../queries/clientpositive/reducesink_dedup.q | 12 +- .../clientpositive/llap/reducesink_dedup.q.out | 229 ------------------- .../clientpositive/reducesink_dedup.q.out | 202 +--------------- 5 files changed, 14 insertions(+), 462 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index e32fc60..01cad2a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -634,7 +634,6 @@ minillaplocal.query.files=\ ptf_streaming.q,\ runtime_stats_merge.q,\ quotedid_smb.q,\ - reducesink_dedup.q,\ resourceplan.q,\ results_cache_1.q,\ results_cache_2.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java index 23ec1eb..7ccd4a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java @@ -475,25 +475,6 @@ public class ReduceSinkDeDuplicationUtils { return 0; } - // Check that in the path between cRS and pRS, there are only Select operators - // i.e. the sequence must be pRS-SEL*-cRS - // ensure SEL does not branch - protected static boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) { - Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0); - while (parent != pRS) { - assert parent.getNumParent() == 1; - if (!(parent instanceof SelectOperator)) { - return false; - } - if (parent.getChildOperators().size() > 1) { - return false; - } - - parent = parent.getParentOperators().get(0); - } - return true; - } - protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException { assert cRS.getNumParent() == 1; @@ -503,8 +484,15 @@ public class ReduceSinkDeDuplicationUtils { List<ExprNodeDesc> cKeys = cConf.getKeyCols(); List<ExprNodeDesc> pKeys = pConf.getKeyCols(); - if (!checkSelectSingleBranchOnly(cRS, pRS)) { - return false; + // Check that in the path between cRS and pRS, there are only Select operators + // i.e. the sequence must be pRS-SEL*-cRS + Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0); + while (parent != pRS) { + assert parent.getNumParent() == 1; + if (!(parent instanceof SelectOperator)) { + return false; + } + parent = parent.getParentOperators().get(0); } // If child keys are null or empty, we bail out @@ -576,7 +564,7 @@ public class ReduceSinkDeDuplicationUtils { // Replace pRS with cRS and remove operator sequence from pRS to cRS // Recall that the sequence must be pRS-SEL*-cRS - Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0); + parent = cRS.getParentOperators().get(0); while (parent != pRS) { dedupCtx.addRemovedOperator(parent); parent = parent.getParentOperators().get(0); http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/test/queries/clientpositive/reducesink_dedup.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q b/ql/src/test/queries/clientpositive/reducesink_dedup.q index b7f9a01..352a558 100644 --- a/ql/src/test/queries/clientpositive/reducesink_dedup.q +++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q @@ -1,13 +1,5 @@ --! qt:dataset:part ---! qt:dataset:src -select p_name +select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 -; - -create temporary table d1 (key int); -create temporary table d2 (key int); - -explain from (select key from src cluster by key) a - insert overwrite table d1 select a.key - insert overwrite table d2 select a.key cluster by a.key; \ No newline at end of file +; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out deleted file mode 100644 index 6dede4c..0000000 --- a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out +++ /dev/null @@ -1,229 +0,0 @@ -PREHOOK: query: select p_name -from (select p_name from part distribute by 1 sort by 1) p -distribute by 1 sort by 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_name -from (select p_name from part distribute by 1 sort by 1) p -distribute by 1 sort by 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -almond antique burnished rose metallic -almond antique burnished rose metallic -almond antique chartreuse lavender yellow -almond antique salmon chartreuse burlywood -almond aquamarine burnished black steel -almond aquamarine pink moccasin thistle -almond antique violet chocolate turquoise -almond antique violet turquoise frosted -almond aquamarine midnight light salmon -almond aquamarine rose maroon antique -almond aquamarine sandy cyan gainsboro -almond antique chartreuse khaki white -almond antique forest lavender goldenrod -almond antique metallic orange dim -almond antique misty red olive -almond antique olive coral navajo -almond antique gainsboro frosted violet -almond antique violet mint lemon -almond aquamarine floral ivory bisque -almond aquamarine yellow dodger mint -almond azure aquamarine papaya violet -almond antique blue firebrick mint -almond antique medium spring khaki -almond antique sky peru orange -almond aquamarine dodger light gainsboro -almond azure blanched chiffon midnight -PREHOOK: query: create temporary table d1 (key int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@d1 -POSTHOOK: query: create temporary table d1 (key int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@d1 -PREHOOK: query: create temporary table d2 (key int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@d2 -POSTHOOK: query: create temporary table d2 (key int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@d2 -PREHOOK: query: explain from (select key from src cluster by key) a - insert overwrite table d1 select a.key - insert overwrite table d2 select a.key cluster by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@d1 -PREHOOK: Output: default@d2 -POSTHOOK: query: explain from (select key from src cluster by key) a - insert overwrite table d1 select a.key - insert overwrite table d2 select a.key cluster by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@d1 -POSTHOOK: Output: default@d2 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d2 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d1 - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.d1 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d2 - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.d2 - http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/test/results/clientpositive/reducesink_dedup.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/reducesink_dedup.q.out index 2b068ac..b89df52 100644 --- a/ql/src/test/results/clientpositive/reducesink_dedup.q.out +++ b/ql/src/test/results/clientpositive/reducesink_dedup.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: select p_name +PREHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_name +POSTHOOK: query: select p_name from (select p_name from part distribute by 1 sort by 1) p distribute by 1 sort by 1 POSTHOOK: type: QUERY @@ -36,201 +36,3 @@ almond antique medium spring khaki almond antique sky peru orange almond aquamarine dodger light gainsboro almond azure blanched chiffon midnight -PREHOOK: query: create temporary table d1 (key int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@d1 -POSTHOOK: query: create temporary table d1 (key int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@d1 -PREHOOK: query: create temporary table d2 (key int) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@d2 -POSTHOOK: query: create temporary table d2 (key int) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@d2 -PREHOOK: query: explain from (select key from src cluster by key) a - insert overwrite table d1 select a.key - insert overwrite table d2 select a.key cluster by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@d1 -PREHOOK: Output: default@d2 -POSTHOOK: query: explain from (select key from src cluster by key) a - insert overwrite table d1 select a.key - insert overwrite table d2 select a.key cluster by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@d1 -POSTHOOK: Output: default@d2 -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 - Stage-4 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 - Stage-1 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Select Operator - expressions: UDFToInteger(_col0) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d2 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(key, 'hll') - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.d1 - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key - Column Types: int - Table: default.d2 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.d2 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -