HIVE-12364 : Distcp job fails when run under Tez (Prasanth J via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/16521c40 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/16521c40 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/16521c40 Branch: refs/heads/spark Commit: 16521c40055afa86a242dad1a5ce708d2aa9b631 Parents: d5a69ec Author: Prasanth Jayachandran <[email protected]> Authored: Mon Nov 9 17:59:37 2015 -0800 Committer: Ashutosh Chauhan <[email protected]> Committed: Mon Nov 9 17:59:37 2015 -0800 ---------------------------------------------------------------------- itests/qtest/pom.xml | 6 ++++++ .../src/test/resources/testconfiguration.properties | 1 + .../test/queries/clientpositive/insert_dir_distcp.q | 9 +++++++++ .../results/clientpositive/insert_dir_distcp.q.out | 14 ++++++++++++++ .../clientpositive/tez/insert_dir_distcp.q.out | 14 ++++++++++++++ .../org/apache/hadoop/hive/shims/Hadoop23Shims.java | 3 +++ 6 files changed, 47 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/16521c40/itests/qtest/pom.xml ---------------------------------------------------------------------- diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml index 65c3c75..cfa49ba 100644 --- a/itests/qtest/pom.xml +++ b/itests/qtest/pom.xml @@ -145,6 +145,12 @@ </dependency> <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-distcp</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> <classifier>tests</classifier> http://git-wip-us.apache.org/repos/asf/hive/blob/16521c40/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index d16c318..70f96da 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -26,6 +26,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\ infer_bucket_sort_num_buckets.q,\ infer_bucket_sort_reducers_power_two.q,\ input16_cc.q,\ + insert_dir_distcp.q,\ join1.q,\ leftsemijoin_mr.q,\ list_bucket_dml_10.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/16521c40/ql/src/test/queries/clientpositive/insert_dir_distcp.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/insert_dir_distcp.q b/ql/src/test/queries/clientpositive/insert_dir_distcp.q new file mode 100644 index 0000000..6582938 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_dir_distcp.q @@ -0,0 +1,9 @@ +set hive.exec.copyfile.maxsize=400; + +set tez.am.log.level=INFO; +set tez.task.log.level=INFO; +-- see TEZ-2931 for using INFO logging + +insert overwrite directory '/tmp/src' select * from src; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src/; http://git-wip-us.apache.org/repos/asf/hive/blob/16521c40/ql/src/test/results/clientpositive/insert_dir_distcp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/insert_dir_distcp.q.out b/ql/src/test/results/clientpositive/insert_dir_distcp.q.out new file mode 100644 index 0000000..b70fa01 --- /dev/null +++ b/ql/src/test/results/clientpositive/insert_dir_distcp.q.out @@ -0,0 +1,14 @@ +PREHOOK: query: -- see TEZ-2931 for using INFO logging + +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- see TEZ-2931 for using INFO logging + +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +Found 1 items +#### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/16521c40/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out b/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out new file mode 100644 index 0000000..b70fa01 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out @@ -0,0 +1,14 @@ +PREHOOK: query: -- see TEZ-2931 for using INFO logging + +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- see TEZ-2931 for using INFO logging + +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +Found 1 items +#### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/16521c40/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java ---------------------------------------------------------------------- diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 36282a5..4da98e4 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -1300,11 +1300,14 @@ public class Hadoop23Shims extends HadoopShimsSecure { options.setSkipCRC(true); options.preserve(FileAttribute.BLOCKSIZE); try { + conf.setBoolean("mapred.mapper.new-api", true); DistCp distcp = new DistCp(conf, options); distcp.execute(); return true; } catch (Exception e) { throw new IOException("Cannot execute DistCp process: " + e, e); + } finally { + conf.setBoolean("mapred.mapper.new-api", false); } }
