MAPREDUCE-6357. MultipleOutputs.write() API should document that output committing is not utilized when input path is absolute. Contributed by Dustin Cote.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2ba90c93 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2ba90c93 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2ba90c93 Branch: refs/heads/HDFS-7240 Commit: 2ba90c93d71aa2d30ee9ed431750c10c685e5599 Parents: 3347493 Author: Akira Ajisaka <aajis...@apache.org> Authored: Fri Aug 21 10:41:54 2015 +0900 Committer: Akira Ajisaka <aajis...@apache.org> Committed: Fri Aug 21 10:41:54 2015 +0900 ---------------------------------------------------------------------- hadoop-mapreduce-project/CHANGES.txt | 4 ++++ .../hadoop/mapreduce/lib/output/MultipleOutputs.java | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ba90c93/hadoop-mapreduce-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 43d0faf..361a19b 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -555,6 +555,10 @@ Release 2.8.0 - UNRELEASED MAPREDUCE-5817. Mappers get rescheduled on node transition even after all reducers are completed. (Sangjin Lee via kasha) + MAPREDUCE-6357. MultipleOutputs.write() API should document that output + committing is not utilized when input path is absolute. + (Dustin Cote via aajisaka) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ba90c93/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java index c31cab7..a3a0e76 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java @@ -120,7 +120,11 @@ import java.util.*; * * <p> * Use <code>MultipleOutputs.write(KEYOUT key, VALUEOUT value, String baseOutputPath)</code> to write key and - * value to a path specified by <code>baseOutputPath</code>, with no need to specify a named output: + * value to a path specified by <code>baseOutputPath</code>, with no need to specify a named output. + * <b>Warning</b>: when the baseOutputPath passed to MultipleOutputs.write + * is a path that resolves outside of the final job output directory, the + * directory is created immediately and then persists through subsequent + * task retries, breaking the concept of output committing: * </p> * * <pre> @@ -418,6 +422,10 @@ public class MultipleOutputs<KEYOUT, VALUEOUT> { * @param value the value * @param baseOutputPath base-output path to write the record to. * Note: Framework will generate unique filename for the baseOutputPath + * <b>Warning</b>: when the baseOutputPath is a path that resolves + * outside of the final job output directory, the directory is created + * immediately and then persists through subsequent task retries, breaking + * the concept of output committing. */ @SuppressWarnings("unchecked") public <K, V> void write(String namedOutput, K key, V value, @@ -442,6 +450,10 @@ public class MultipleOutputs<KEYOUT, VALUEOUT> { * @param value the value * @param baseOutputPath base-output path to write the record to. * Note: Framework will generate unique filename for the baseOutputPath + * <b>Warning</b>: when the baseOutputPath is a path that resolves + * outside of the final job output directory, the directory is created + * immediately and then persists through subsequent task retries, breaking + * the concept of output committing. */ @SuppressWarnings("unchecked") public void write(KEYOUT key, VALUEOUT value, String baseOutputPath)