[34/49] hadoop git commit: MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera)
MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/aa92b764 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/aa92b764 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/aa92b764 Branch: refs/heads/YARN-2928 Commit: aa92b764a7ddb888d097121c4d610089a0053d11 Parents: a380643 Author: Gera Shegalov g...@apache.org Authored: Tue Mar 10 11:12:48 2015 -0700 Committer: Gera Shegalov g...@apache.org Committed: Tue Mar 10 11:32:08 2015 -0700 -- hadoop-mapreduce-project/CHANGES.txt| 3 + .../lib/output/FileOutputCommitter.java | 119 ++-- .../src/main/resources/mapred-default.xml | 54 .../hadoop/mapred/TestFileOutputCommitter.java | 134 +++ .../lib/output/TestFileOutputCommitter.java | 116 ++-- 5 files changed, 349 insertions(+), 77 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/CHANGES.txt -- diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index eecf022..0bbe85c 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -348,6 +348,9 @@ Release 2.7.0 - UNRELEASED MAPREDUCE-6059. Speed up history server startup time (Siqi Li via aw) +MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output +files. (Siqi Li via gera) + BUG FIXES MAPREDUCE-6210. Use getApplicationAttemptId() instead of getApplicationId() http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java index 55252f0..28a8548 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.mapreduce.lib.output; +import java.io.FileNotFoundException; import java.io.IOException; import org.apache.commons.logging.Log; @@ -25,6 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -57,10 +59,14 @@ public class FileOutputCommitter extends OutputCommitter { @Deprecated protected static final String TEMP_DIR_NAME = PENDING_DIR_NAME; public static final String SUCCEEDED_FILE_NAME = _SUCCESS; - public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = -mapreduce.fileoutputcommitter.marksuccessfuljobs; + public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = + mapreduce.fileoutputcommitter.marksuccessfuljobs; + public static final String FILEOUTPUTCOMMITTER_ALGORITHM_VERSION = + mapreduce.fileoutputcommitter.algorithm.version; + public static final int FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT = 1; private Path outputPath = null; private Path workPath = null; + private final int algorithmVersion; /** * Create a file output committer @@ -87,6 +93,14 @@ public class FileOutputCommitter extends OutputCommitter { @Private public FileOutputCommitter(Path outputPath, JobContext context) throws IOException { +Configuration conf = context.getConfiguration(); +algorithmVersion = +conf.getInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, +FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT); +LOG.info(File Output Committer Algorithm version is + algorithmVersion); +if (algorithmVersion != 1 algorithmVersion != 2) { + throw new IOException(Only 1 or 2 algorithm version is supported); +} if (outputPath != null) { FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
hadoop git commit: MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera)
Repository: hadoop Updated Branches: refs/heads/trunk a380643d2 - aa92b764a MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/aa92b764 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/aa92b764 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/aa92b764 Branch: refs/heads/trunk Commit: aa92b764a7ddb888d097121c4d610089a0053d11 Parents: a380643 Author: Gera Shegalov g...@apache.org Authored: Tue Mar 10 11:12:48 2015 -0700 Committer: Gera Shegalov g...@apache.org Committed: Tue Mar 10 11:32:08 2015 -0700 -- hadoop-mapreduce-project/CHANGES.txt| 3 + .../lib/output/FileOutputCommitter.java | 119 ++-- .../src/main/resources/mapred-default.xml | 54 .../hadoop/mapred/TestFileOutputCommitter.java | 134 +++ .../lib/output/TestFileOutputCommitter.java | 116 ++-- 5 files changed, 349 insertions(+), 77 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/CHANGES.txt -- diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index eecf022..0bbe85c 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -348,6 +348,9 @@ Release 2.7.0 - UNRELEASED MAPREDUCE-6059. Speed up history server startup time (Siqi Li via aw) +MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output +files. (Siqi Li via gera) + BUG FIXES MAPREDUCE-6210. Use getApplicationAttemptId() instead of getApplicationId() http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java index 55252f0..28a8548 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.mapreduce.lib.output; +import java.io.FileNotFoundException; import java.io.IOException; import org.apache.commons.logging.Log; @@ -25,6 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -57,10 +59,14 @@ public class FileOutputCommitter extends OutputCommitter { @Deprecated protected static final String TEMP_DIR_NAME = PENDING_DIR_NAME; public static final String SUCCEEDED_FILE_NAME = _SUCCESS; - public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = -mapreduce.fileoutputcommitter.marksuccessfuljobs; + public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = + mapreduce.fileoutputcommitter.marksuccessfuljobs; + public static final String FILEOUTPUTCOMMITTER_ALGORITHM_VERSION = + mapreduce.fileoutputcommitter.algorithm.version; + public static final int FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT = 1; private Path outputPath = null; private Path workPath = null; + private final int algorithmVersion; /** * Create a file output committer @@ -87,6 +93,14 @@ public class FileOutputCommitter extends OutputCommitter { @Private public FileOutputCommitter(Path outputPath, JobContext context) throws IOException { +Configuration conf = context.getConfiguration(); +algorithmVersion = +conf.getInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, +FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT); +LOG.info(File Output Committer Algorithm version is + algorithmVersion); +if (algorithmVersion != 1 algorithmVersion != 2) { + throw new IOException(Only 1 or 2 algorithm version is supported); +} if (outputPath != null) {
hadoop git commit: MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera)
Repository: hadoop Updated Branches: refs/heads/branch-2 2690c7252 - 6802e8fef MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera) (cherry picked from commit aa92b764a7ddb888d097121c4d610089a0053d11) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6802e8fe Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6802e8fe Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6802e8fe Branch: refs/heads/branch-2 Commit: 6802e8fefc9af806f77e36426a145fd93ba9f009 Parents: 2690c72 Author: Gera Shegalov g...@apache.org Authored: Tue Mar 10 11:12:48 2015 -0700 Committer: Gera Shegalov g...@apache.org Committed: Tue Mar 10 11:40:45 2015 -0700 -- hadoop-mapreduce-project/CHANGES.txt| 3 + .../lib/output/FileOutputCommitter.java | 119 ++-- .../src/main/resources/mapred-default.xml | 54 .../hadoop/mapred/TestFileOutputCommitter.java | 134 +++ .../lib/output/TestFileOutputCommitter.java | 116 ++-- 5 files changed, 349 insertions(+), 77 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/6802e8fe/hadoop-mapreduce-project/CHANGES.txt -- diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 5efcd32..6b57ddd 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -103,6 +103,9 @@ Release 2.7.0 - UNRELEASED MAPREDUCE-6059. Speed up history server startup time (Siqi Li via aw) +MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output +files. (Siqi Li via gera) + BUG FIXES MAPREDUCE-6210. Use getApplicationAttemptId() instead of getApplicationId() http://git-wip-us.apache.org/repos/asf/hadoop/blob/6802e8fe/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java index 55252f0..28a8548 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.mapreduce.lib.output; +import java.io.FileNotFoundException; import java.io.IOException; import org.apache.commons.logging.Log; @@ -25,6 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -57,10 +59,14 @@ public class FileOutputCommitter extends OutputCommitter { @Deprecated protected static final String TEMP_DIR_NAME = PENDING_DIR_NAME; public static final String SUCCEEDED_FILE_NAME = _SUCCESS; - public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = -mapreduce.fileoutputcommitter.marksuccessfuljobs; + public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = + mapreduce.fileoutputcommitter.marksuccessfuljobs; + public static final String FILEOUTPUTCOMMITTER_ALGORITHM_VERSION = + mapreduce.fileoutputcommitter.algorithm.version; + public static final int FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT = 1; private Path outputPath = null; private Path workPath = null; + private final int algorithmVersion; /** * Create a file output committer @@ -87,6 +93,14 @@ public class FileOutputCommitter extends OutputCommitter { @Private public FileOutputCommitter(Path outputPath, JobContext context) throws IOException { +Configuration conf = context.getConfiguration(); +algorithmVersion = +conf.getInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, +FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT); +LOG.info(File Output Committer Algorithm version is + algorithmVersion); +if (algorithmVersion != 1 algorithmVersion != 2) { + throw new IOException(Only 1