[34/49] hadoop git commit: MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera)

2015-03-12 Thread zjshen
MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. 
(Siqi Li via gera)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/aa92b764
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/aa92b764
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/aa92b764

Branch: refs/heads/YARN-2928
Commit: aa92b764a7ddb888d097121c4d610089a0053d11
Parents: a380643
Author: Gera Shegalov g...@apache.org
Authored: Tue Mar 10 11:12:48 2015 -0700
Committer: Gera Shegalov g...@apache.org
Committed: Tue Mar 10 11:32:08 2015 -0700

--
 hadoop-mapreduce-project/CHANGES.txt|   3 +
 .../lib/output/FileOutputCommitter.java | 119 ++--
 .../src/main/resources/mapred-default.xml   |  54 
 .../hadoop/mapred/TestFileOutputCommitter.java  | 134 +++
 .../lib/output/TestFileOutputCommitter.java | 116 ++--
 5 files changed, 349 insertions(+), 77 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/CHANGES.txt
--
diff --git a/hadoop-mapreduce-project/CHANGES.txt 
b/hadoop-mapreduce-project/CHANGES.txt
index eecf022..0bbe85c 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -348,6 +348,9 @@ Release 2.7.0 - UNRELEASED
 
 MAPREDUCE-6059. Speed up history server startup time (Siqi Li via aw)
 
+MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output
+files. (Siqi Li via gera)
+
   BUG FIXES
 
 MAPREDUCE-6210. Use getApplicationAttemptId() instead of getApplicationId()

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
--
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
index 55252f0..28a8548 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.mapreduce.lib.output;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 
 import org.apache.commons.logging.Log;
@@ -25,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -57,10 +59,14 @@ public class FileOutputCommitter extends OutputCommitter {
   @Deprecated
   protected static final String TEMP_DIR_NAME = PENDING_DIR_NAME;
   public static final String SUCCEEDED_FILE_NAME = _SUCCESS;
-  public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = 
-mapreduce.fileoutputcommitter.marksuccessfuljobs;
+  public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER =
+  mapreduce.fileoutputcommitter.marksuccessfuljobs;
+  public static final String FILEOUTPUTCOMMITTER_ALGORITHM_VERSION =
+  mapreduce.fileoutputcommitter.algorithm.version;
+  public static final int FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT = 1;
   private Path outputPath = null;
   private Path workPath = null;
+  private final int algorithmVersion;
 
   /**
* Create a file output committer
@@ -87,6 +93,14 @@ public class FileOutputCommitter extends OutputCommitter {
   @Private
   public FileOutputCommitter(Path outputPath, 
  JobContext context) throws IOException {
+Configuration conf = context.getConfiguration();
+algorithmVersion =
+conf.getInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
+FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT);
+LOG.info(File Output Committer Algorithm version is  + algorithmVersion);
+if (algorithmVersion != 1  algorithmVersion != 2) {
+  throw new IOException(Only 1 or 2 algorithm version is supported);
+}
 if (outputPath != null) {
   FileSystem fs = outputPath.getFileSystem(context.getConfiguration());
 

hadoop git commit: MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera)

2015-03-10 Thread gera
Repository: hadoop
Updated Branches:
  refs/heads/trunk a380643d2 - aa92b764a


MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. 
(Siqi Li via gera)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/aa92b764
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/aa92b764
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/aa92b764

Branch: refs/heads/trunk
Commit: aa92b764a7ddb888d097121c4d610089a0053d11
Parents: a380643
Author: Gera Shegalov g...@apache.org
Authored: Tue Mar 10 11:12:48 2015 -0700
Committer: Gera Shegalov g...@apache.org
Committed: Tue Mar 10 11:32:08 2015 -0700

--
 hadoop-mapreduce-project/CHANGES.txt|   3 +
 .../lib/output/FileOutputCommitter.java | 119 ++--
 .../src/main/resources/mapred-default.xml   |  54 
 .../hadoop/mapred/TestFileOutputCommitter.java  | 134 +++
 .../lib/output/TestFileOutputCommitter.java | 116 ++--
 5 files changed, 349 insertions(+), 77 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/CHANGES.txt
--
diff --git a/hadoop-mapreduce-project/CHANGES.txt 
b/hadoop-mapreduce-project/CHANGES.txt
index eecf022..0bbe85c 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -348,6 +348,9 @@ Release 2.7.0 - UNRELEASED
 
 MAPREDUCE-6059. Speed up history server startup time (Siqi Li via aw)
 
+MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output
+files. (Siqi Li via gera)
+
   BUG FIXES
 
 MAPREDUCE-6210. Use getApplicationAttemptId() instead of getApplicationId()

http://git-wip-us.apache.org/repos/asf/hadoop/blob/aa92b764/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
--
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
index 55252f0..28a8548 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.mapreduce.lib.output;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 
 import org.apache.commons.logging.Log;
@@ -25,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -57,10 +59,14 @@ public class FileOutputCommitter extends OutputCommitter {
   @Deprecated
   protected static final String TEMP_DIR_NAME = PENDING_DIR_NAME;
   public static final String SUCCEEDED_FILE_NAME = _SUCCESS;
-  public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = 
-mapreduce.fileoutputcommitter.marksuccessfuljobs;
+  public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER =
+  mapreduce.fileoutputcommitter.marksuccessfuljobs;
+  public static final String FILEOUTPUTCOMMITTER_ALGORITHM_VERSION =
+  mapreduce.fileoutputcommitter.algorithm.version;
+  public static final int FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT = 1;
   private Path outputPath = null;
   private Path workPath = null;
+  private final int algorithmVersion;
 
   /**
* Create a file output committer
@@ -87,6 +93,14 @@ public class FileOutputCommitter extends OutputCommitter {
   @Private
   public FileOutputCommitter(Path outputPath, 
  JobContext context) throws IOException {
+Configuration conf = context.getConfiguration();
+algorithmVersion =
+conf.getInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
+FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT);
+LOG.info(File Output Committer Algorithm version is  + algorithmVersion);
+if (algorithmVersion != 1  algorithmVersion != 2) {
+  throw new IOException(Only 1 or 2 algorithm version is supported);
+}
 if (outputPath != null) {
  

hadoop git commit: MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. (Siqi Li via gera)

2015-03-10 Thread gera
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 2690c7252 - 6802e8fef


MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output files. 
(Siqi Li via gera)

(cherry picked from commit aa92b764a7ddb888d097121c4d610089a0053d11)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6802e8fe
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6802e8fe
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6802e8fe

Branch: refs/heads/branch-2
Commit: 6802e8fefc9af806f77e36426a145fd93ba9f009
Parents: 2690c72
Author: Gera Shegalov g...@apache.org
Authored: Tue Mar 10 11:12:48 2015 -0700
Committer: Gera Shegalov g...@apache.org
Committed: Tue Mar 10 11:40:45 2015 -0700

--
 hadoop-mapreduce-project/CHANGES.txt|   3 +
 .../lib/output/FileOutputCommitter.java | 119 ++--
 .../src/main/resources/mapred-default.xml   |  54 
 .../hadoop/mapred/TestFileOutputCommitter.java  | 134 +++
 .../lib/output/TestFileOutputCommitter.java | 116 ++--
 5 files changed, 349 insertions(+), 77 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/6802e8fe/hadoop-mapreduce-project/CHANGES.txt
--
diff --git a/hadoop-mapreduce-project/CHANGES.txt 
b/hadoop-mapreduce-project/CHANGES.txt
index 5efcd32..6b57ddd 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -103,6 +103,9 @@ Release 2.7.0 - UNRELEASED
 
 MAPREDUCE-6059. Speed up history server startup time (Siqi Li via aw)
 
+MAPREDUCE-4815. Speed up FileOutputCommitter#commitJob for many output
+files. (Siqi Li via gera)
+
   BUG FIXES
 
 MAPREDUCE-6210. Use getApplicationAttemptId() instead of getApplicationId()

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6802e8fe/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
--
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
index 55252f0..28a8548 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.mapreduce.lib.output;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 
 import org.apache.commons.logging.Log;
@@ -25,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -57,10 +59,14 @@ public class FileOutputCommitter extends OutputCommitter {
   @Deprecated
   protected static final String TEMP_DIR_NAME = PENDING_DIR_NAME;
   public static final String SUCCEEDED_FILE_NAME = _SUCCESS;
-  public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = 
-mapreduce.fileoutputcommitter.marksuccessfuljobs;
+  public static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER =
+  mapreduce.fileoutputcommitter.marksuccessfuljobs;
+  public static final String FILEOUTPUTCOMMITTER_ALGORITHM_VERSION =
+  mapreduce.fileoutputcommitter.algorithm.version;
+  public static final int FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT = 1;
   private Path outputPath = null;
   private Path workPath = null;
+  private final int algorithmVersion;
 
   /**
* Create a file output committer
@@ -87,6 +93,14 @@ public class FileOutputCommitter extends OutputCommitter {
   @Private
   public FileOutputCommitter(Path outputPath, 
  JobContext context) throws IOException {
+Configuration conf = context.getConfiguration();
+algorithmVersion =
+conf.getInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
+FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT);
+LOG.info(File Output Committer Algorithm version is  + algorithmVersion);
+if (algorithmVersion != 1  algorithmVersion != 2) {
+  throw new IOException(Only 1