Repository: reef Updated Branches: refs/heads/master 0d78bf822 -> 977965c23
[REEF-1682] Update TCP connection config values for IMRU example and test Recent stress testing for IMRU FT shows when there are hundreds of nodes with many failed and re-requested evaluators, the default TCP connection retry time is not long enough for evaluators to connect to driver, as driver might be busy handling a long event queue and each event handler is locked in IMRU driver. We need to set proper configuration values for IMRU example which is used for running stress testing. In some of the IMRU tests this configuration is set via task function config, which has no effect. It should be set through configuration provider to work. This change sets TCP connection configuration values for IMRU example and fixes this setting in functional tests. JIRA: [REEF-1682](https://issues.apache.org/jira/browse/REEF-1682) Pull request: This closes #1197 Project: http://git-wip-us.apache.org/repos/asf/reef/repo Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/977965c2 Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/977965c2 Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/977965c2 Branch: refs/heads/master Commit: 977965c239ec59888c0dae5309d598f00848824c Parents: 0d78bf8 Author: Julia Wang <[email protected]> Authored: Thu Dec 1 16:27:59 2016 -0800 Committer: Mariia Mykhailova <[email protected]> Committed: Mon Dec 5 13:30:34 2016 -0800 ---------------------------------------------------------------------- .../OnREEFIMRURunTimeConfiguration.cs | 10 +++--- .../Functional/IMRU/IMRUCloseTaskTest.cs | 32 -------------------- .../Functional/IMRU/TestFailMapperEvaluators.cs | 19 ++---------- .../Functional/ReefFunctionalTest.cs | 14 +++++++-- 4 files changed, 19 insertions(+), 56 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/reef/blob/977965c2/lang/cs/Org.Apache.REEF.IMRU.Examples/OnREEFIMRURunTimeConfiguration.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.IMRU.Examples/OnREEFIMRURunTimeConfiguration.cs b/lang/cs/Org.Apache.REEF.IMRU.Examples/OnREEFIMRURunTimeConfiguration.cs index 31585da..854e5ad 100644 --- a/lang/cs/Org.Apache.REEF.IMRU.Examples/OnREEFIMRURunTimeConfiguration.cs +++ b/lang/cs/Org.Apache.REEF.IMRU.Examples/OnREEFIMRURunTimeConfiguration.cs @@ -59,7 +59,7 @@ namespace Org.Apache.REEF.IMRU.Examples .Build(); } - return Configurations.Merge(runtimeConfig, imruClientConfig, GetTcpConfiguration()); + return Configurations.Merge(runtimeConfig, imruClientConfig, GetTcpConnectionConfiguration()); } /// <summary> @@ -74,14 +74,14 @@ namespace Org.Apache.REEF.IMRU.Examples var runtimeConfig = YARNClientConfiguration.ConfigurationModule .Build(); - return Configurations.Merge(runtimeConfig, imruClientConfig, GetTcpConfiguration()); + return Configurations.Merge(runtimeConfig, imruClientConfig, GetTcpConnectionConfiguration()); } - private static IConfiguration GetTcpConfiguration() + private static IConfiguration GetTcpConnectionConfiguration() { return TcpClientConfigurationModule.ConfigurationModule - .Set(TcpClientConfigurationModule.MaxConnectionRetry, "200") - .Set(TcpClientConfigurationModule.SleepTime, "1000") + .Set(TcpClientConfigurationModule.MaxConnectionRetry, "300") + .Set(TcpClientConfigurationModule.SleepTime, "2000") .Build(); } } http://git-wip-us.apache.org/repos/asf/reef/blob/977965c2/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/IMRUCloseTaskTest.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/IMRUCloseTaskTest.cs b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/IMRUCloseTaskTest.cs index 225ae42..01a80fe 100644 --- a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/IMRUCloseTaskTest.cs +++ b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/IMRUCloseTaskTest.cs @@ -21,9 +21,7 @@ using System.Diagnostics; using Org.Apache.REEF.Driver.Evaluator; using Org.Apache.REEF.Driver.Task; using Org.Apache.REEF.IMRU.OnREEF.Driver; -using Org.Apache.REEF.Network; using Org.Apache.REEF.Tang.Annotations; -using Org.Apache.REEF.Tang.Implementations.Configuration; using Org.Apache.REEF.Tang.Interface; using Org.Apache.REEF.Tang.Util; using Org.Apache.REEF.Utilities; @@ -119,36 +117,6 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU } /// <summary> - /// Mapper function configuration. Add TcpConfiguration to the base configuration - /// </summary> - /// <returns></returns> - protected override IConfiguration BuildMapperFunctionConfig() - { - return Configurations.Merge(GetTcpConfiguration(), base.BuildMapperFunctionConfig()); - } - - /// <summary> - /// Update function configuration. Add TcpConfiguration to the base configuration. - /// </summary> - /// <returns></returns> - protected override IConfiguration BuildUpdateFunctionConfigModule() - { - return Configurations.Merge(GetTcpConfiguration(), base.BuildUpdateFunctionConfigModule()); - } - - /// <summary> - /// Override default setting for retry policy - /// </summary> - /// <returns></returns> - private IConfiguration GetTcpConfiguration() - { - return TcpClientConfigurationModule.ConfigurationModule - .Set(TcpClientConfigurationModule.MaxConnectionRetry, "5") - .Set(TcpClientConfigurationModule.SleepTime, "1000") - .Build(); - } - - /// <summary> /// Test handlers /// </summary> internal sealed class TestHandlers : IObserver<IRunningTask>, IObserver<ICompletedTask>, IObserver<IFailedTask>, IObserver<IFailedEvaluator> http://git-wip-us.apache.org/repos/asf/reef/blob/977965c2/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluators.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluators.cs b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluators.cs index ebeb2c6..1898066 100644 --- a/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluators.cs +++ b/lang/cs/Org.Apache.REEF.Tests/Functional/IMRU/TestFailMapperEvaluators.cs @@ -21,7 +21,6 @@ using Org.Apache.REEF.IMRU.Examples.PipelinedBroadcastReduce; using Org.Apache.REEF.IMRU.OnREEF.Driver; using Org.Apache.REEF.IMRU.OnREEF.IMRUTasks; using Org.Apache.REEF.IMRU.OnREEF.Parameters; -using Org.Apache.REEF.Network; using Org.Apache.REEF.Tang.Implementations.Configuration; using Org.Apache.REEF.Tang.Implementations.Tang; using Org.Apache.REEF.Tang.Interface; @@ -184,7 +183,7 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU .BindNamedParameter(typeof(PipelinedBroadcastAndReduceWithFaultTolerant.TotalNumberOfForcedFailures), NumberOfRetry.ToString()) .Build(); - return Configurations.Merge(c1, c2, GetTcpConfiguration()); + return Configurations.Merge(c1, c2); } /// <summary> @@ -193,24 +192,10 @@ namespace Org.Apache.REEF.Tests.Functional.IMRU /// <returns></returns> protected override IConfiguration BuildUpdateFunctionConfigModule() { - var c = IMRUUpdateConfiguration<int[], int[], int[]>.ConfigurationModule + return IMRUUpdateConfiguration<int[], int[], int[]>.ConfigurationModule .Set(IMRUUpdateConfiguration<int[], int[], int[]>.UpdateFunction, GenericType<PipelinedBroadcastAndReduceWithFaultTolerant.BroadcastSenderReduceReceiverUpdateFunctionFT>.Class) .Build(); - - return Configurations.Merge(c, GetTcpConfiguration()); - } - - /// <summary> - /// Override default setting for retry policy - /// </summary> - /// <returns></returns> - private IConfiguration GetTcpConfiguration() - { - return TcpClientConfigurationModule.ConfigurationModule - .Set(TcpClientConfigurationModule.MaxConnectionRetry, "200") - .Set(TcpClientConfigurationModule.SleepTime, "1000") - .Build(); } /// <summary> http://git-wip-us.apache.org/repos/asf/reef/blob/977965c2/lang/cs/Org.Apache.REEF.Tests/Functional/ReefFunctionalTest.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Tests/Functional/ReefFunctionalTest.cs b/lang/cs/Org.Apache.REEF.Tests/Functional/ReefFunctionalTest.cs index 53d9e7b..fee7ac3 100644 --- a/lang/cs/Org.Apache.REEF.Tests/Functional/ReefFunctionalTest.cs +++ b/lang/cs/Org.Apache.REEF.Tests/Functional/ReefFunctionalTest.cs @@ -28,6 +28,7 @@ using Microsoft.WindowsAzure.Storage.Blob; using Org.Apache.REEF.Client.API; using Org.Apache.REEF.Client.Local; using Org.Apache.REEF.Client.Yarn; +using Org.Apache.REEF.Network; using Org.Apache.REEF.Tang.Implementations.Configuration; using Org.Apache.REEF.Tang.Implementations.Tang; using Org.Apache.REEF.Tang.Interface; @@ -380,20 +381,29 @@ namespace Org.Apache.REEF.Tests.Functional { case Local: var dir = Path.Combine(".", runtimeFolder); - return LocalRuntimeClientConfiguration.ConfigurationModule + var localClientConfig = LocalRuntimeClientConfiguration.ConfigurationModule .Set(LocalRuntimeClientConfiguration.NumberOfEvaluators, numberOfEvaluator.ToString()) .Set(LocalRuntimeClientConfiguration.RuntimeFolder, dir) .Build(); + return Configurations.Merge(localClientConfig, GetTcpConnectionConfiguration()); case YARN: var yarnClientConfig = YARNClientConfiguration.ConfigurationModule.Build(); var tcpPortConfig = TcpPortConfigurationModule.ConfigurationModule .Set(TcpPortConfigurationModule.PortRangeStart, PortRangeStart) .Set(TcpPortConfigurationModule.PortRangeCount, PortRangeCount) .Build(); - return Configurations.Merge(yarnClientConfig, tcpPortConfig); + return Configurations.Merge(yarnClientConfig, tcpPortConfig, GetTcpConnectionConfiguration()); default: throw new Exception("Unknown runtime: " + runOnYarn); } } + + private IConfiguration GetTcpConnectionConfiguration() + { + return TcpClientConfigurationModule.ConfigurationModule + .Set(TcpClientConfigurationModule.MaxConnectionRetry, "150") + .Set(TcpClientConfigurationModule.SleepTime, "1000") + .Build(); + } } } \ No newline at end of file
