Repository: reef Updated Branches: refs/heads/master 729128ef9 -> 35b48e1ea
[REEF-1625] Fix TestFailMapperEvaluatorsOnDispose failures in AppVeyor This change reduces the number of failing evaluators in the test and fine-tunes the checks to account for possible retries. See JIRA for explanation of time-sensitivity of this test. JIRA: [REEF-1625](https://issues.apache.org/jira/browse/REEF-1625) Pull request: This closes #1189 Project: http://git-wip-us.apache.org/repos/asf/reef/repo Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/35b48e1e Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/35b48e1e Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/35b48e1e Branch: refs/heads/master Commit: 35b48e1eab365ff609a49ee97b0143805d7b61f3 Parents: 729128e Author: Mariia Mykhailova <[email protected]> Authored: Tue Nov 22 17:28:25 2016 -0800 Committer: Julia Wang <[email protected]> Committed: Wed Nov 23 12:18:46 2016 -0800 ---------------------------------------------------------------------- .../IMRU/TestFailMapperEvaluatorsOnDispose.cs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/reef/blob/35b48e1e/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs index 017580e..5a09739 100644 --- a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs +++ b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluatorsOnDispose.cs @@ -54,18 +54,23 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU NumberOfRetry, testFolder); string[] lines = ReadLogFile(DriverStdout, "driver", testFolder, 360); - var completedTaskCount = GetMessageCount(lines, "Received ICompletedTask"); var failedEvaluatorCount = GetMessageCount(lines, FailedEvaluatorMessage); var failedTaskCount = GetMessageCount(lines, FailedTaskMessage); var jobSuccess = GetMessageCount(lines, IMRUDriver<int[], int[], int[], int[]>.DoneActionPrefix); - // In first retry, all tasks are completed and then there are 2 failed evaluators. - // No failed tasks. - Assert.Equal(2, failedEvaluatorCount); + // In this test one of evaluators fails at task dispose stage. Depending on the timing of the failure, + // if it happens after all tasks completed, the job succeeds immediately, + // but if it happens before that, this counts as failure and job restarts. + // Number of tries done can be detected as number of recoveries done + 1 + var triesDone = GetMessageCount(lines, "Start recovery") + 1; + + // There should be no failed tasks. + // Number of failed evaluators = number of tries done + // Can't say anything about the number of completed tasks (depends on timing) + Assert.Equal(triesDone, failedEvaluatorCount); Assert.Equal(0, failedTaskCount); - Assert.Equal(numTasks, completedTaskCount); - // eventually job succeeds + // but eventually job must succeed Assert.Equal(1, jobSuccess); CleanUp(testFolder); } @@ -79,7 +84,6 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU return TangFactory.GetTang().NewConfigurationBuilder(c) .BindSetEntry<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail, string>(GenericType<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail>.Class, "IMRUMap-RandomInputPartition-2-") - .BindSetEntry<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail, string>(GenericType<PipelinedBroadcastAndReduceWithFaultTolerant.TaskIdsToFail>.Class, "IMRUMap-RandomInputPartition-3-") .BindIntNamedParam<PipelinedBroadcastAndReduceWithFaultTolerant.FailureType>(PipelinedBroadcastAndReduceWithFaultTolerant.FailureType.EvaluatorFailureDuringTaskDispose.ToString()) .BindNamedParameter(typeof(MaxRetryNumberInRecovery), NumberOfRetry.ToString()) .BindNamedParameter(typeof(PipelinedBroadcastAndReduceWithFaultTolerant.TotalNumberOfForcedFailures), NumberOfRetry.ToString())
