Repository: reef Updated Branches: refs/heads/master b5c807ad6 -> 5ed617298
[REEF-1411] Support multiple RMs and other Hadoop distributions This addressed the issue by * Reading RM web addresses from yarn-site.xml. * Deprecating MultipleRMUrlProvider in favor of YarnConfigurationUrlProvider. * Add a new class `YARNConfiguration` for access to site.xml and such. JIRA: [REEF-1411](https://issues.apache.org/jira/browse/REEF-1411) Project: http://git-wip-us.apache.org/repos/asf/reef/repo Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/5ed61729 Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/5ed61729 Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/5ed61729 Branch: refs/heads/master Commit: 5ed617298986b04a51d338adf249cecb6bc603e8 Parents: b5c807a Author: Andrew Chung <[email protected]> Authored: Mon May 16 17:13:14 2016 -0700 Committer: Markus Weimer <[email protected]> Committed: Tue Jun 7 10:52:23 2016 -0700 ---------------------------------------------------------------------- .../MultipleRMUrlProviderTests.cs | 23 +- .../YarnConfigurationUrlProviderTests.cs | 66 ++--- .../YARN/RESTClient/MultipleRMUrlProvider.cs | 44 +--- .../RESTClient/YarnConfigurationUrlProvider.cs | 74 ++---- .../DefaultYarnClusterHttpDriverConnection.cs | 36 ++- .../Org.Apache.Reef.Utilities.csproj | 3 + .../Runtime/Yarn/HttpConfig.cs | 28 +++ .../Runtime/Yarn/YarnConfiguration.cs | 249 +++++++++++++++++++ 8 files changed, 376 insertions(+), 147 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Client.Tests/MultipleRMUrlProviderTests.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Client.Tests/MultipleRMUrlProviderTests.cs b/lang/cs/Org.Apache.REEF.Client.Tests/MultipleRMUrlProviderTests.cs index 364234d..01923a5 100644 --- a/lang/cs/Org.Apache.REEF.Client.Tests/MultipleRMUrlProviderTests.cs +++ b/lang/cs/Org.Apache.REEF.Client.Tests/MultipleRMUrlProviderTests.cs @@ -18,6 +18,7 @@ using System; using System.Globalization; using System.IO; +using System.Threading.Tasks; using Org.Apache.REEF.Client.Yarn.RestClient; using Org.Apache.REEF.Client.YARN.RestClient; using Org.Apache.REEF.Tang.Exceptions; @@ -41,6 +42,14 @@ namespace Org.Apache.REEF.Client.Tests <!-- Put site-specific property overrides in this file. --> <configuration xmlns:xi=""http://www.w3.org/2001/XInclude""> <property> + <name>yarn.resourcemanager.ha.enabled</name> + <value>TRUE</value> + </property> + <property> + <name>yarn.resourcemanager.ha.rm-ids</name> + <value>rm1,rm2</value> + </property> + <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>" + AnyHttpAddressConfig + @"</value> </property> @@ -89,23 +98,15 @@ namespace Org.Apache.REEF.Client.Tests } [Fact] - public void CannotFindHadoopConfigDirThrowsArgumentException() + public void CannotFindHadoopConfigDirThrowsException() { using (new YarnConfigurationUrlProviderTests.TemporaryOverrideEnvironmentVariable(HadoopConfDirEnvVariable, string.Empty)) { - try - { - IUrlProvider urlProviderNotUsed = GetYarnConfigurationUrlProvider(); - Assert.True(false, "Should throw exception"); - } - catch (InjectionException injectionException) - { - Assert.True(injectionException.GetBaseException() is ArgumentException); - } + Assert.Throws<InjectionException>(() => GetYarnConfigurationUrlProvider()); } } - private IUrlProvider GetYarnConfigurationUrlProvider() + private static IUrlProvider GetYarnConfigurationUrlProvider() { var builder = TangFactory.GetTang().NewConfigurationBuilder() .BindImplementation(GenericType<IUrlProvider>.Class, GenericType<MultipleRMUrlProvider>.Class) http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Client.Tests/YarnConfigurationUrlProviderTests.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Client.Tests/YarnConfigurationUrlProviderTests.cs b/lang/cs/Org.Apache.REEF.Client.Tests/YarnConfigurationUrlProviderTests.cs index ba42625..cb92087 100644 --- a/lang/cs/Org.Apache.REEF.Client.Tests/YarnConfigurationUrlProviderTests.cs +++ b/lang/cs/Org.Apache.REEF.Client.Tests/YarnConfigurationUrlProviderTests.cs @@ -39,40 +39,26 @@ namespace Org.Apache.REEF.Client.Tests private const string AnyHttpsAddressConfig = @"anyotherhost:9088"; private const string YarnConfigurationXmlContent = @"<?xml version=""1.0""?> -<?xml-stylesheet type=""text/xsl"" href=""configuration.xsl""?> -<!-- Put site-specific property overrides in this file. --> -<configuration xmlns:xi=""http://www.w3.org/2001/XInclude""> - <property> - <name>yarn.resourcemanager.webapp.https.address</name> - <value>" + AnyHttpsAddressConfig + @"</value> - </property> - <property> - <name>yarn.resourcemanager.webapp.address</name> - <value>" + AnyHttpAddressConfig + @"</value> - </property> - <property> - <name>yarn.nodemanager.local-dirs</name> - <value>C:\hdpdata\hadoop\local</value> - </property> -</configuration>"; - - private const string YarnConfigurationXmlContentChanged = @"<?xml version=""1.0""?> -<?xml-stylesheet type=""text/xsl"" href=""configuration.xsl""?> -<!-- Put site-specific property overrides in this file. --> -<configuration xmlns:xi=""http://www.w3.org/2001/XInclude""> - <property> - <name>yarn.resourcemanager.webapp.https.address</name> - <value>" + AnyHttpsAddressConfig + @"</value> - </property> - <property> - <name>yarn.resourcemanager.webapp.address</name> - <value>" + AnyHttpAddressConfigUpdated + @"</value> - </property> - <property> - <name>yarn.nodemanager.local-dirs</name> - <value>C:\hdpdata\hadoop\local</value> - </property> -</configuration>"; + <?xml-stylesheet type=""text/xsl"" href=""configuration.xsl""?> + <!-- Put site-specific property overrides in this file. --> + <configuration xmlns:xi=""http://www.w3.org/2001/XInclude""> + <property> + <name>yarn.http.policy</name> + <value>HTTP_AND_HTTPS</value> + </property> + <property> + <name>yarn.resourcemanager.webapp.https.address</name> + <value>" + AnyHttpsAddressConfig + @"</value> + </property> + <property> + <name>yarn.resourcemanager.webapp.address</name> + <value>" + AnyHttpAddressConfigUpdated + @"</value> + </property> + <property> + <name>yarn.nodemanager.local-dirs</name> + <value>C:\hdpdata\hadoop\local</value> + </property> + </configuration>"; [Fact] public void UrlProviderReadsEnvVarConfiguredConfigFileAndParsesCorrectHttpUrl() @@ -137,19 +123,11 @@ namespace Org.Apache.REEF.Client.Tests { using (new TemporaryOverrideEnvironmentVariable(HadoopConfDirEnvVariable, string.Empty)) { - try - { - YarnConfigurationUrlProvider urlProviderNotUsed = GetYarnConfigurationUrlProvider(); - Assert.True(false, "Should throw exception"); - } - catch (InjectionException injectionException) - { - Assert.True(injectionException.GetBaseException() is ArgumentException); - } + Assert.Throws<InjectionException>(() => GetYarnConfigurationUrlProvider()); } } - private YarnConfigurationUrlProvider GetYarnConfigurationUrlProvider( + private static YarnConfigurationUrlProvider GetYarnConfigurationUrlProvider( string anyHadoopConfigDir = null, bool useHttps = false) { http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/MultipleRMUrlProvider.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/MultipleRMUrlProvider.cs b/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/MultipleRMUrlProvider.cs index 4429279..1e941e4 100644 --- a/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/MultipleRMUrlProvider.cs +++ b/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/MultipleRMUrlProvider.cs @@ -17,58 +17,28 @@ using System; using System.Collections.Generic; -using System.IO; -using System.Linq; using System.Threading.Tasks; -using System.Xml.Linq; using Org.Apache.REEF.Client.Yarn.RestClient; using Org.Apache.REEF.Tang.Annotations; -using Org.Apache.REEF.Utilities.Logging; +using Org.Apache.REEF.Utilities.Runtime.Yarn; namespace Org.Apache.REEF.Client.YARN.RestClient { - public class MultipleRMUrlProvider : IUrlProvider + [Obsolete("Deprecated in 0.16. The default implementation of IUrlProvider," + + " YarnConfigurationUrlProvider, provides the same functionality as MultipleRMUrlProvider.")] + public sealed class MultipleRMUrlProvider : IUrlProvider { - private const string RmConfigKeyPrefix = "yarn.resourcemanager.webapp.address.rm"; - private static readonly string HadoopConfDirEnvVariable = "HADOOP_CONF_DIR"; - private static readonly string YarnConfigFileName = "yarn-site.xml"; - private static readonly Logger Logger = Logger.GetLogger(typeof(MultipleRMUrlProvider)); - private IList<Uri> _yarnRmUri; + private readonly YarnConfiguration _yarnConfiguration; [Inject] private MultipleRMUrlProvider() { - var hadoopConfigDir = Environment.GetEnvironmentVariable(HadoopConfDirEnvVariable); - - if (string.IsNullOrEmpty(hadoopConfigDir) || !Directory.Exists(hadoopConfigDir)) - { - throw new ArgumentException(HadoopConfDirEnvVariable + " is not configured or does not exist.", - "hadoopConfigDir"); - } - - Logger.Log(Level.Verbose, "Using {0} as hadoop configuration directory", hadoopConfigDir); - string yarnConfigurationFile = Path.Combine(hadoopConfigDir, YarnConfigFileName); - LoadYarnConfiguration(yarnConfigurationFile); + _yarnConfiguration = YarnConfiguration.GetConfiguration(); } public Task<IEnumerable<Uri>> GetUrlAsync() { - return Task.FromResult((IEnumerable<Uri>)_yarnRmUri); - } - - private void LoadYarnConfiguration(string yarnConfigurationFile) - { - var configRoot = XElement.Load(yarnConfigurationFile); - var address = configRoot.Elements("property") - .Where(x => - ((string)x.Element("name")).ToUpper().StartsWith(RmConfigKeyPrefix.ToUpper())) - .Select(x => (string)x.Element("value")); - _yarnRmUri = - address.Select(x => x.TrimEnd('/') + @"/") - .Select(x => string.Format("http://{0}", x)) - .Where(x => Uri.IsWellFormedUriString(x, UriKind.Absolute)) - .Select(x => new Uri(x)) - .ToList(); + return Task.FromResult(_yarnConfiguration.GetYarnRMWebappEndpoints()); } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/YarnConfigurationUrlProvider.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/YarnConfigurationUrlProvider.cs b/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/YarnConfigurationUrlProvider.cs index d46d73a..092a359 100644 --- a/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/YarnConfigurationUrlProvider.cs +++ b/lang/cs/Org.Apache.REEF.Client/YARN/RESTClient/YarnConfigurationUrlProvider.cs @@ -17,34 +17,44 @@ using System; using System.Collections.Generic; -using System.IO; -using System.Linq; using System.Threading.Tasks; -using System.Xml.Linq; using Org.Apache.REEF.Tang.Annotations; -using Org.Apache.REEF.Utilities.Logging; +using Org.Apache.REEF.Utilities.Runtime.Yarn; namespace Org.Apache.REEF.Client.Yarn.RestClient { - internal class YarnConfigurationUrlProvider : IUrlProvider + /// <summary> + /// The <see cref="IUrlProvider"/> for YARN that reads yarn-site.xml to locate RM web + /// URLs for client job submission. + /// </summary> + public sealed class YarnConfigurationUrlProvider : IUrlProvider { - [NamedParameter("Location of Hadoop configurations.")] - public class HadoopConfigurationDirectory : Name<string> + private readonly YarnConfiguration _yarnConfiguration; + + /// <summary> + /// The location of Hadoop configurations. Defaults to a whitespace string, + /// which instructs <see cref="YarnConfigurationUrlProvider"/> to read + /// the environment variable HADOOP_CONF_DIR. + /// </summary> + [NamedParameter("Location of Hadoop configurations.", defaultValue: " ")] + public sealed class HadoopConfigurationDirectory : Name<string> { + private HadoopConfigurationDirectory() + { + } } + /// <summary> + /// Whether the provider should use HTTPS to construct the URI or not. + /// </summary> [NamedParameter("True if HTTPS should be used to construct the URI.", defaultValue: "false")] - public class UseHttpsForYarnCommunication : Name<bool> + public sealed class UseHttpsForYarnCommunication : Name<bool> { + private UseHttpsForYarnCommunication() + { + } } - private static readonly string HadoopConfDirEnvVariable = "HADOOP_CONF_DIR"; - private static readonly string YarnConfigFileName = "yarn-site.xml"; - private static readonly string YarnRmWebappHttpsAddressPropertyName = "yarn.resourcemanager.webapp.https.address"; - private static readonly string YarnRmWebappHttpAddressPropertyName = "yarn.resourcemanager.webapp.address"; - private static readonly Logger Logger = Logger.GetLogger(typeof(YarnConfigurationUrlProvider)); - private IList<Uri> _yarnRmUri; - [Inject] private YarnConfigurationUrlProvider( [Parameter(typeof(UseHttpsForYarnCommunication))] bool useHttps) @@ -57,42 +67,12 @@ namespace Org.Apache.REEF.Client.Yarn.RestClient [Parameter(typeof(HadoopConfigurationDirectory))] string hadoopConfigDir, [Parameter(typeof(UseHttpsForYarnCommunication))] bool useHttps) { - hadoopConfigDir = hadoopConfigDir ?? Environment.GetEnvironmentVariable(HadoopConfDirEnvVariable); - - if (string.IsNullOrEmpty(hadoopConfigDir) || !Directory.Exists(hadoopConfigDir)) - { - throw new ArgumentException(HadoopConfDirEnvVariable + " is not configured or does not exist.", - "hadoopConfigDir"); - } - - Logger.Log(Level.Verbose, "Using {0} as hadoop configuration directory", hadoopConfigDir); - string yarnConfigurationFile = Path.Combine(hadoopConfigDir, YarnConfigFileName); - LoadYarnConfiguration(yarnConfigurationFile, useHttps); + _yarnConfiguration = YarnConfiguration.GetConfiguration(hadoopConfigDir, useHttps: useHttps); } public Task<IEnumerable<Uri>> GetUrlAsync() { - return Task.FromResult((IEnumerable<Uri>)_yarnRmUri); - } - - private void LoadYarnConfiguration(string yarnConfigurationFile, bool useHttps) - { - var configRoot = XElement.Load(yarnConfigurationFile); - - string propertyName = useHttps - ? YarnRmWebappHttpsAddressPropertyName - : YarnRmWebappHttpAddressPropertyName; - - string prefix = useHttps ? "https" : "http"; - - var address = (string)configRoot.Elements("property") - .Where(x => - (string)x.Element("name") == propertyName) - .Select(x => x.Element("value")).FirstOrDefault(); - - address = address.TrimEnd('/') + @"/"; - - _yarnRmUri = new List<Uri> { new Uri(string.Format("{0}://{1}", prefix, address)) }; + return Task.FromResult(_yarnConfiguration.GetYarnRMWebappEndpoints()); } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Common/Evaluator/DefaultYarnClusterHttpDriverConnection.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Common/Evaluator/DefaultYarnClusterHttpDriverConnection.cs b/lang/cs/Org.Apache.REEF.Common/Evaluator/DefaultYarnClusterHttpDriverConnection.cs index 6ae2b72..e1bdf46 100644 --- a/lang/cs/Org.Apache.REEF.Common/Evaluator/DefaultYarnClusterHttpDriverConnection.cs +++ b/lang/cs/Org.Apache.REEF.Common/Evaluator/DefaultYarnClusterHttpDriverConnection.cs @@ -17,17 +17,23 @@ using System; using Org.Apache.REEF.Tang.Annotations; +using Org.Apache.REEF.Utilities.Logging; +using Org.Apache.REEF.Utilities.Runtime.Yarn; namespace Org.Apache.REEF.Common.Evaluator { public sealed class DefaultYarnClusterHttpDriverConnection : IDriverConnection { + private static readonly Logger Logger = Logger.GetLogger(typeof(DefaultYarnClusterHttpDriverConnection)); + private readonly string _applicationId; + private readonly YarnConfiguration _yarnConfiguration; [Inject] private DefaultYarnClusterHttpDriverConnection() { _applicationId = Environment.GetEnvironmentVariable(Constants.ReefYarnApplicationIdEnvironmentVariable); + _yarnConfiguration = YarnConfiguration.GetConfiguration(); } public DriverInformation GetDriverInformation() @@ -37,14 +43,28 @@ namespace Org.Apache.REEF.Common.Evaluator throw new ApplicationException("Could not fetch the application ID from YARN's container environment variables."); } - // e.g., http://headnodehost:9014/proxy/application_1407519727821_0012/reef/v1/driver - Uri queryUri = new Uri( - string.Concat( - Constants.HDInsightClusterHttpEndpointBaseUri, - _applicationId + "/", - Constants.HttpReefUriSpecification, - Constants.HttpDriverUriTarget)); - return DriverInformation.GetDriverInformationFromHttp(queryUri); + var yarnRMWebAppEndpoints = _yarnConfiguration.GetYarnRMWebappEndpoints(); + + foreach (var yarnRMWebAppEndpoint in yarnRMWebAppEndpoints) + { + try + { + var queryUri = new Uri( + yarnRMWebAppEndpoint, + "proxy/" + _applicationId + "/" + Constants.HttpReefUriSpecification + Constants.HttpDriverUriTarget); + return DriverInformation.GetDriverInformationFromHttp(queryUri); + } + catch (Exception ex) + { + Utilities.Diagnostics.Exceptions.Caught( + ex, + Level.Info, + "Unable to reach RM at " + yarnRMWebAppEndpoint, + Logger); + } + } + + throw new ApplicationException("Unable to get Driver Information."); } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj b/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj index c2cd297..7e0e214 100644 --- a/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj +++ b/lang/cs/Org.Apache.REEF.Utilities/Org.Apache.Reef.Utilities.csproj @@ -35,6 +35,7 @@ under the License. </Reference> <Reference Include="System" /> <Reference Include="System.Core" /> + <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <Compile Include="AsyncUtils\LoggingHelper.cs" /> @@ -65,6 +66,8 @@ under the License. <Compile Include="NetUtilities.cs" /> <Compile Include="Optional.cs" /> <Compile Include="Properties\AssemblyInfo.cs" /> + <Compile Include="Runtime\Yarn\HttpConfig.cs" /> + <Compile Include="Runtime\Yarn\YarnConfiguration.cs" /> <Compile Include="ValidationUtilities.cs" /> </ItemGroup> <ItemGroup> http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/HttpConfig.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/HttpConfig.cs b/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/HttpConfig.cs new file mode 100644 index 0000000..b935fd5 --- /dev/null +++ b/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/HttpConfig.cs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +using System; + +namespace Org.Apache.REEF.Utilities.Runtime.Yarn +{ + public static class HttpConfig + { + public static readonly string HttpOnlyPolicy = "HTTP_ONLY"; + public static readonly string HttpsOnlyPolicy = "HTTPS_ONLY"; + public static readonly string HttpAndHttpsPolicy = "HTTP_AND_HTTPS"; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/reef/blob/5ed61729/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/YarnConfiguration.cs ---------------------------------------------------------------------- diff --git a/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/YarnConfiguration.cs b/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/YarnConfiguration.cs new file mode 100644 index 0000000..82887a5 --- /dev/null +++ b/lang/cs/Org.Apache.REEF.Utilities/Runtime/Yarn/YarnConfiguration.cs @@ -0,0 +1,249 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Xml; +using Org.Apache.REEF.Utilities.Attributes; +using Org.Apache.REEF.Utilities.Diagnostics; +using Org.Apache.REEF.Utilities.Logging; + +namespace Org.Apache.REEF.Utilities.Runtime.Yarn +{ + [Unstable("0.16. Namespace may move.")] + public sealed class YarnConfiguration + { + private static readonly Logger Logger = Logger.GetLogger(typeof(YarnConfiguration)); + + /// <summary> + /// The Hadoop configuration directory environment variable. + /// </summary> + public static readonly string HadoopConfDirEnvVariable = "HADOOP_CONF_DIR"; + + /// <summary> + /// The YARN configuration XML file name. + /// </summary> + public static readonly string YarnConfigFileName = "yarn-site.xml"; + + /// <summary> + /// The RM Web application address configuration property. + /// </summary> + public static readonly string RMWebappHttpsAddress = "yarn.resourcemanager.webapp.https.address"; + + /// <summary> + /// The RM Web application address configuration property. + /// </summary> + public static readonly string RMWebappAddress = "yarn.resourcemanager.webapp.address"; + + public static readonly string YARNHttpPolicyKey = "yarn.http.policy"; + + public static readonly string RMHaEnabled = "yarn.resourcemanager.ha.enabled"; + + /// <summary> + /// The RM HA IDs property. + /// </summary> + private const string RMHaIds = "yarn.resourcemanager.ha.rm-ids"; + + private readonly XmlDocument _yarnSiteXmlDoc; + private readonly bool _useHttps; + + /// <summary> + /// Returns a configuration representing the YARN configuration on the cluster. + /// </summary> + /// <param name="hadoopConfDir"> + /// The hadoop configuration directory, defaults to use the environment variable <see cref="HadoopConfDirEnvVariable"/>. + /// </param> + /// <param name="yarnConfigFileName"> + /// The YARN configuration file name, defaults to <see cref="YarnConfigFileName"/> + /// </param> + /// <param name="useHttps"> + /// Whether or not to use HTTPS, defaults to read from YARN configuration. + /// </param> + public static YarnConfiguration GetConfiguration( + string hadoopConfDir = null, + string yarnConfigFileName = null, + bool? useHttps = null) + { + return new YarnConfiguration(hadoopConfDir, yarnConfigFileName, useHttps); + } + + private YarnConfiguration( + string hadoopConfDir, + string yarnConfigFileName, + bool? useHttps) + { + var hadoopConfigDir = string.IsNullOrWhiteSpace(hadoopConfDir) ? + Environment.GetEnvironmentVariable(HadoopConfDirEnvVariable) + : hadoopConfDir; + + yarnConfigFileName = string.IsNullOrWhiteSpace(yarnConfigFileName) ? + YarnConfigFileName + : yarnConfigFileName; + + if (string.IsNullOrEmpty(hadoopConfigDir) || !Directory.Exists(hadoopConfigDir)) + { + throw new ArgumentException(hadoopConfigDir + " is not configured or does not exist."); + } + + Logger.Log(Level.Verbose, "Using {0} as hadoop configuration directory", hadoopConfigDir); + var yarnConfigurationFile = Path.Combine(hadoopConfigDir, yarnConfigFileName); + + _yarnSiteXmlDoc = new XmlDocument(); + _yarnSiteXmlDoc.Load(yarnConfigurationFile); + if (useHttps == null) + { + var httpPolicyStr = GetString(YARNHttpPolicyKey, HttpConfig.HttpOnlyPolicy); + _useHttps = !httpPolicyStr.Equals(HttpConfig.HttpOnlyPolicy); + } + else + { + if (useHttps.Value) + { + var httpPolicyStr = GetString(YARNHttpPolicyKey, HttpConfig.HttpOnlyPolicy); + if (httpPolicyStr.Equals(HttpConfig.HttpOnlyPolicy)) + { + throw new ArgumentException("YARN cluster does not support HTTPS when useHttps is set to true."); + } + } + + _useHttps = useHttps.Value; + } + } + + /// <summary> + /// Gets the YARN RM web application endpoints. + /// </summary> + public IEnumerable<Uri> GetYarnRMWebappEndpoints() + { + if (GetBool(RMHaEnabled)) + { + var rmIds = GetStrings(RMHaIds); + if (rmIds == null || rmIds.Length == 0) + { + throw new ApplicationException("RM HA enabled, but RM IDs were not found."); + } + + var rmIdWebAppEndpoints = new List<Uri>(); + + foreach (var rmId in rmIds) + { + var rmAddrPropertyToUse = _useHttps ? RMWebappHttpsAddress : RMWebappAddress; + var rmWebAppAddressProperty = rmAddrPropertyToUse + "." + rmId; + bool isFound; + var rmWebAppAddressNodeText = GetString(rmWebAppAddressProperty, out isFound); + if (!isFound) + { + continue; + } + + try + { + rmIdWebAppEndpoints.Add(UriFromString(rmWebAppAddressNodeText)); + } + catch (UriFormatException e) + { + Exceptions.Caught(e, + Level.Warning, + "Unable to format " + rmWebAppAddressNodeText + " to URI", + Logger); + } + } + + if (rmIdWebAppEndpoints.Count == 0) + { + throw new ApplicationException("RM HA enabled, but RM IDs were not found."); + } + + return rmIdWebAppEndpoints; + } + + var rmAddressNodeText = GetString(_useHttps ? RMWebappHttpsAddress : RMWebappAddress); + if (string.IsNullOrWhiteSpace(rmAddressNodeText)) + { + throw new ApplicationException("Unable to find RM Webapp Address from yarn-site.xml."); + } + + return new[] { UriFromString(rmAddressNodeText) }; + } + + /// <summary> + /// Gets the bool value of an XML node under + /// /configuration/property with name <see cref="propertyName"/> in the YARN configuration file. + /// </summary> + /// <remarks> + /// If does not exist or is not boolean, returns defaultValue. + /// </remarks> + public bool GetBool(string propertyName, bool defaultValue = false) + { + bool isFound; + var str = GetString(propertyName, out isFound); + + bool value; + if (isFound && bool.TryParse(str, out value)) + { + return value; + } + + return defaultValue; + } + + /// <summary> + /// Gets the text value of an XML node under + /// /configuration/property with name <see cref="propertyName"/> in the YARN configuration file. + /// </summary> + public string GetString(string propertyName, string defaultValue = null) + { + bool isFound; + var str = GetString(propertyName, out isFound); + return isFound ? str : defaultValue; + } + + /// <summary> + /// Gets the comma delimited text values of an XML node under + /// /configuration/property with name <see cref="propertyName"/> in the YARN configuration file. + /// </summary> + public string[] GetStrings(string propertyName, string[] defaultValues = null) + { + bool isFound; + var propertyStr = GetString(propertyName, out isFound); + return isFound ? propertyStr.Split(',') : defaultValues; + } + + /// <summary> + /// Gets the text value of an XML node under + /// /configuration/property with name <see cref="propertyName"/> in the YARN configuration file. + /// </summary> + private string GetString(string propertyName, out bool isFound) + { + var node = _yarnSiteXmlDoc + .SelectSingleNode("/configuration/property[name='" + propertyName + "']/value/text()"); + isFound = node != null; + return node == null ? null : node.Value; + } + + /// <summary> + /// Returns a URI from string. + /// </summary> + private Uri UriFromString(string webAppUriStr) + { + var protocolStr = _useHttps ? "https://" : "http://"; + var text = webAppUriStr.TrimEnd('/') + "/"; + return new Uri(protocolStr + text); + } + } +} \ No newline at end of file
