TEZ-3604. Remove the compositeInputAttemptIdentifier from remaining list upon fetch completion in the Ordered case (Kuhu Shukla via jeagles)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/29d6df2c Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/29d6df2c Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/29d6df2c Branch: refs/heads/master Commit: 29d6df2c241dab5748086d1536f3f0d41421cbe1 Parents: 0cf1ce2 Author: Jonathan Eagles <[email protected]> Authored: Fri Feb 3 13:00:18 2017 -0600 Committer: Jonathan Eagles <[email protected]> Committed: Fri Feb 3 13:00:18 2017 -0600 ---------------------------------------------------------------------- TEZ-3334-CHANGES.txt | 1 + .../common/shuffle/orderedgrouped/FetcherOrderedGrouped.java | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tez/blob/29d6df2c/TEZ-3334-CHANGES.txt ---------------------------------------------------------------------- diff --git a/TEZ-3334-CHANGES.txt b/TEZ-3334-CHANGES.txt index 8416cc9..49da5aa 100644 --- a/TEZ-3334-CHANGES.txt +++ b/TEZ-3334-CHANGES.txt @@ -4,6 +4,7 @@ Apache Tez Change Log INCOMPATIBLE CHANGES: ALL CHANGES: + TEZ-3604. Remove the compositeInputAttemptIdentifier from remaining list upon fetch completion in the Ordered case TEZ-3599. Unordered Fetcher can hang if empty partitions are present TEZ-3596. Number of Empty DME logged for Composite fetch is too high TEZ-3597. Composite Fetch hangs on certain DME empty events. http://git-wip-us.apache.org/repos/asf/tez/blob/29d6df2c/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java ---------------------------------------------------------------------- diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java index f213268..6bdb453 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java @@ -278,11 +278,16 @@ class FetcherOrderedGrouped extends CallableWithNdc<Void> { // yet_to_be_fetched list and marking the failed tasks. InputAttemptIdentifier[] failedTasks = null; while (!remaining.isEmpty() && failedTasks == null) { + String inputAttemptIdentifierId = + remaining.entrySet().iterator().next().getKey(); // fail immediately after first failure because we dont know how much to // skip for this error in the input stream. So we cannot move on to the // remaining outputs. YARN-1773. Will get to them in the next retry. try { failedTasks = copyMapOutput(host, input); + if (failedTasks == null || failedTasks.length == 0) { + remaining.remove(inputAttemptIdentifierId); + } } catch (FetcherReadTimeoutException e) { // Setup connection again if disconnected cleanupCurrentConnection(true); @@ -568,7 +573,6 @@ class FetcherOrderedGrouped extends CallableWithNdc<Void> { scheduler.copySucceeded(srcAttemptId, host, compressedLength, decompressedLength, endTime - startTime, mapOutput, false); // Note successful shuffle - remaining.remove(srcAttemptId.toString()); metrics.successFetch(); } } catch(IOException ioe) {
