[ 
https://issues.apache.org/jira/browse/YARN-3642?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14544462#comment-14544462
 ] 

Lee Hounshell commented on YARN-3642:
-------------------------------------

Here is a copy of our yarn-site.xml:

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

   <property>
      <name>yarn.nodemanager.aux-services</name>
      <value>mapreduce_shuffle</value>
   </property>

   <property>
      <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
      <value>org.apache.hadoop.mapred.ShuffleHandler</value>
   </property>

   <property>
      <name>yarn.resourcemanager.hostname</name>
      <value>qadoop-nn001.apsalar.com</value>
   </property>

   <property>
      <name>yarn.resourcemanager.scheduler.address</name>
      <value>qadoop-nn001.apsalar.com:8030</value>
   </property>

   <property>
      <name>yarn.resourcemanager.address</name>
      <value>qadoop-nn001.apsalar.com:8032</value>
   </property>

   <property>
      <name>yarn.resourcemanager.webap.address</name>
      <value>qadoop-nn001.apsalar.com:8088</value>
   </property>

   <property>
      <name>yarn.resourcemanager.resource-tracker.address</name>
      <value>qadoop-nn001.apsalar.com:8031</value>
   </property>

   <property>
      <name>yarn.resourcemanager.admin.address</name>
      <value>qadoop-nn001.apsalar.com:8033</value>
   </property>

   <property>
      <name>yarn.log-aggregation-enable</name>
      <value>true</value>
   </property>

   <property>
      <description>Where to aggregate logs to.</description>
      <name>yarn.nodemanager.remote-app-log-dir</name>
      <value>/var/log/hadoop/apps</value>
   </property>

   <property>
      <name>yarn.web-proxy.address</name>
      <value>qadoop-nn001.apsalar.com:8088</value>
   </property>

</configuration>


> Hadoop2 yarn.resourcemanager.scheduler.address not loaded by RMProxy.java
> -------------------------------------------------------------------------
>
>                 Key: YARN-3642
>                 URL: https://issues.apache.org/jira/browse/YARN-3642
>             Project: Hadoop YARN
>          Issue Type: Bug
>          Components: resourcemanager
>    Affects Versions: 2.7.0
>         Environment: yarn-site.xml:
> <configuration>
>    <property>
>       <name>yarn.nodemanager.aux-services</name>
>       <value>mapreduce_shuffle</value>
>    </property>
>    <property>
>       <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
>       <value>org.apache.hadoop.mapred.ShuffleHandler</value>
>    </property>
>    <property>
>       <name>yarn.resourcemanager.hostname</name>
>       <value>qadoop-nn001.apsalar.com</value>
>    </property>
>    <property>
>       <name>yarn.resourcemanager.scheduler.address</name>
>       <value>qadoop-nn001.apsalar.com:8030</value>
>    </property>
>    <property>
>       <name>yarn.resourcemanager.address</name>
>       <value>qadoop-nn001.apsalar.com:8032</value>
>    </property>
>    <property>
>       <name>yarn.resourcemanager.webap.address</name>
>       <value>qadoop-nn001.apsalar.com:8088</value>
>    </property>
>    <property>
>       <name>yarn.resourcemanager.resource-tracker.address</name>
>       <value>qadoop-nn001.apsalar.com:8031</value>
>    </property>
>    <property>
>       <name>yarn.resourcemanager.admin.address</name>
>       <value>qadoop-nn001.apsalar.com:8033</value>
>    </property>
>    <property>
>       <name>yarn.log-aggregation-enable</name>
>       <value>true</value>
>    </property>
>    <property>
>       <description>Where to aggregate logs to.</description>
>       <name>yarn.nodemanager.remote-app-log-dir</name>
>       <value>/var/log/hadoop/apps</value>
>    </property>
>    <property>
>       <name>yarn.web-proxy.address</name>
>       <value>qadoop-nn001.apsalar.com:8088</value>
>    </property>
> </configuration>
> core-site.xml:
> <configuration>
>    <property>
>       <name>fs.defaultFS</name>
>       <value>hdfs://qadoop-nn001.apsalar.com</value>
>    </property>
>    <property>
>       <name>hadoop.proxyuser.hdfs.hosts</name>
>       <value>*</value>
>    </property>
>    <property>
>       <name>hadoop.proxyuser.hdfs.groups</name>
>       <value>*</value>
>    </property>
> </configuration>
> hdfs-site.xml:
> <configuration>
>    <property>
>       <name>dfs.replication</name>
>       <value>2</value>
>    </property>
>    <property>
>       <name>dfs.namenode.name.dir</name>
>       <value>file:/hadoop/nn</value>
>    </property>
>    <property>
>       <name>dfs.datanode.data.dir</name>
>       <value>file:/hadoop/dn/dfs</value>
>    </property>
>    <property>
>       <name>dfs.http.address</name>
>       <value>qadoop-nn001.apsalar.com:50070</value>
>    </property>
>    <property>
>       <name>dfs.secondary.http.address</name>
>       <value>qadoop-nn002.apsalar.com:50090</value>
>    </property>
> </configuration>
> mapred-site.xml:
> <configuration>
>    <property> 
>       <name>mapred.job.tracker</name> 
>       <value>qadoop-nn001.apsalar.com:8032</value> 
>    </property>
>    <property>
>       <name>mapreduce.framework.name</name>
>       <value>yarn</value>
>    </property>
>    <property>
>       <name>mapreduce.jobhistory.address</name>
>       <value>qadoop-nn001.apsalar.com:10020</value>
>       <description>the JobHistoryServer address.</description>
>    </property>
>    <property>  
>       <name>mapreduce.jobhistory.webapp.address</name>  
>       <value>qadoop-nn001.apsalar.com:19888</value>  
>       <description>the JobHistoryServer web address</description>
>    </property>
> </configuration>
> hbase-site.xml:
> <configuration>
>     <property> 
>         <name>hbase.master</name> 
>         <value>qadoop-nn001.apsalar.com:60000</value> 
>     </property> 
>     <property> 
>         <name>hbase.rootdir</name> 
>         <value>hdfs://qadoop-nn001.apsalar.com:8020/hbase</value> 
>     </property> 
>     <property> 
>         <name>hbase.cluster.distributed</name> 
>         <value>true</value> 
>     </property> 
>     <property>
>         <name>hbase.zookeeper.property.dataDir</name>
>         <value>/opt/local/zookeeper</value>
>     </property> 
>     <property>
>         <name>hbase.zookeeper.property.clientPort</name>
>         <value>2181</value> 
>     </property>
>     <property> 
>         <name>hbase.zookeeper.quorum</name> 
>         <value>qadoop-nn001.apsalar.com</value> 
>     </property> 
>     <property> 
>         <name>zookeeper.session.timeout</name> 
>         <value>180000</value> 
>     </property> 
> </configuration>
>            Reporter: Lee Hounshell
>
> There is an issue with Hadoop 2.7.0 when in distributed operation the 
> datanode is unable to reach the yarn scheduler.  In our yarn-site.xml, we 
> have defined this path to be:
> {code}
>    <property>
>       <name>yarn.resourcemanager.scheduler.address</name>
>       <value>qadoop-nn001.apsalar.com:8030</value>
>    </property>
> {code}
> But when running an oozie job, the problem manifests when looking at the job 
> logs for the yarn container.
> We see logs similar to the following showing the connection problem:
> {quote}
> Showing 4096 bytes. Click here for full log
> [main] org.apache.hadoop.http.HttpServer2: Jetty bound to port 64065
> 2015-05-13 17:49:33,930 INFO [main] org.mortbay.log: jetty-6.1.26
> 2015-05-13 17:49:33,971 INFO [main] org.mortbay.log: Extract 
> jar:file:/opt/local/hadoop/hadoop-2.7.0/share/hadoop/yarn/hadoop-yarn-common-2.7.0.jar!/webapps/mapreduce
>  to /var/tmp/Jetty_0_0_0_0_64065_mapreduce____.1ayyhk/webapp
> 2015-05-13 17:49:34,234 INFO [main] org.mortbay.log: Started 
> HttpServer2$SelectChannelConnectorWithSafeStartup@0.0.0.0:64065
> 2015-05-13 17:49:34,234 INFO [main] org.apache.hadoop.yarn.webapp.WebApps: 
> Web app /mapreduce started at 64065
> 2015-05-13 17:49:34,645 INFO [main] org.apache.hadoop.yarn.webapp.WebApps: 
> Registered webapp guice modules
> 2015-05-13 17:49:34,651 INFO [main] org.apache.hadoop.ipc.CallQueueManager: 
> Using callQueue class java.util.concurrent.LinkedBlockingQueue
> 2015-05-13 17:49:34,652 INFO [Socket Reader #1 for port 38927] 
> org.apache.hadoop.ipc.Server: Starting Socket Reader #1 for port 38927
> 2015-05-13 17:49:34,660 INFO [IPC Server Responder] 
> org.apache.hadoop.ipc.Server: IPC Server Responder: starting
> 2015-05-13 17:49:34,660 INFO [IPC Server listener on 38927] 
> org.apache.hadoop.ipc.Server: IPC Server listener on 38927: starting
> 2015-05-13 17:49:34,700 INFO [main] 
> org.apache.hadoop.mapreduce.v2.app.rm.RMContainerRequestor: 
> nodeBlacklistingEnabled:true
> 2015-05-13 17:49:34,700 INFO [main] 
> org.apache.hadoop.mapreduce.v2.app.rm.RMContainerRequestor: 
> maxTaskFailuresPerNode is 3
> 2015-05-13 17:49:34,700 INFO [main] 
> org.apache.hadoop.mapreduce.v2.app.rm.RMContainerRequestor: 
> blacklistDisablePercent is 33
> 2015-05-13 17:49:34,775 INFO [main] org.apache.hadoop.yarn.client.RMProxy: 
> Connecting to ResourceManager at /0.0.0.0:8030
> 2015-05-13 17:49:35,820 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 0 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:36,821 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 1 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:37,823 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 2 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:38,824 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 3 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:39,825 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 4 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:40,826 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 5 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:41,827 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 6 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:42,828 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 7 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:43,829 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 8 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> 2015-05-13 17:49:44,830 INFO [main] org.apache.hadoop.ipc.Client: Retrying 
> connect to server: 0.0.0.0/0.0.0.0:8030. Already tried 9 time(s); retry 
> policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 
> MILLISECONDS)
> {quote}
> To prove the problem, we have patched the file:
> {code}
> hadoop-2.7.0/src/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java
> {code}
> so that we now "inject" the yarn.resourcemanager.scheduler.address directly 
> into the configuration.
> The modified code looks like this:
> {code}
>   @Private
>   protected static <T> T createRMProxy(final Configuration configuration,
>       final Class<T> protocol, RMProxy instance) throws IOException {
>     YarnConfiguration conf = (configuration instanceof YarnConfiguration)
>         ? (YarnConfiguration) configuration
>         : new YarnConfiguration(configuration);
>     LOG.info("LEE: changing the conf to include 
> yarn.resourcemanager.scheduler.address at 10.1.26.1");
>     conf.set("yarn.resourcemanager.scheduler.address", "10.1.26.1");
>     RetryPolicy retryPolicy = createRetryPolicy(conf);
>     if (HAUtil.isHAEnabled(conf)) {
>       RMFailoverProxyProvider<T> provider =
>           instance.createRMFailoverProxyProvider(conf, protocol);
>       return (T) RetryProxy.create(protocol, provider, retryPolicy);
>     } else {
>       InetSocketAddress rmAddress = instance.getRMAddress(conf, protocol);
>       LOG.info("LEE: Connecting to ResourceManager at " + rmAddress);
>       T proxy = RMProxy.<T>getProxy(conf, protocol, rmAddress);
>       return (T) RetryProxy.create(protocol, proxy, retryPolicy);
>     }
>   }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to