Repository: hadoop
Updated Branches:
  refs/heads/branch-2 be2334ba3 -> 6772c3f4d


YARN-2019. Retrospect on decision of making RM crashed if any exception throw 
in ZKRMStateStore. Contributed by Jian He.
(cherry picked from commit ee98d6354bbbcd0832d3e539ee097f837e5d0e31)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6772c3f4
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6772c3f4
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6772c3f4

Branch: refs/heads/branch-2
Commit: 6772c3f4ddfc14645f26497d346708e910711366
Parents: be2334b
Author: Junping Du <junping...@apache.org>
Authored: Wed Jul 22 17:52:35 2015 -0700
Committer: Junping Du <junping...@apache.org>
Committed: Wed Jul 22 17:57:16 2015 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                     |  3 +++
 .../apache/hadoop/yarn/conf/YarnConfiguration.java  | 11 +++++++++++
 .../src/main/resources/yarn-default.xml             | 16 ++++++++++++++++
 .../resourcemanager/recovery/RMStateStore.java      |  9 +++++++--
 4 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/6772c3f4/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index be74800..7905676 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -89,6 +89,9 @@ Release 2.8.0 - UNRELEASED
     YARN-2003. Support for Application priority : Changes in RM and Capacity 
     Scheduler. (Sunil G via wangda)
 
+    YARN-2019. Retrospect on decision of making RM crashed if any exception 
throw 
+    in ZKRMStateStore. (Jian He via junping_du)
+
   IMPROVEMENTS
 
     YARN-644. Basic null check is not performed on passed in arguments before

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6772c3f4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index cdfb393..7d602a6 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -401,6 +401,11 @@ public class YarnConfiguration extends Configuration {
   public static final String RECOVERY_ENABLED = RM_PREFIX + "recovery.enabled";
   public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false;
 
+  public static final String YARN_FAIL_FAST = YARN_PREFIX + "fail-fast";
+  public static final boolean DEFAULT_YARN_FAIL_FAST = true;
+
+  public static final String RM_FAIL_FAST = RM_PREFIX + "fail-fast";
+
   @Private
   public static final String RM_WORK_PRESERVING_RECOVERY_ENABLED = RM_PREFIX
       + "work-preserving-recovery.enabled";
@@ -2018,6 +2023,12 @@ public class YarnConfiguration extends Configuration {
             YARN_HTTP_POLICY_DEFAULT));
   }
 
+  public static boolean shouldRMFailFast(Configuration conf) {
+    return conf.getBoolean(YarnConfiguration.RM_FAIL_FAST,
+        conf.getBoolean(YarnConfiguration.YARN_FAIL_FAST,
+            YarnConfiguration.DEFAULT_YARN_FAIL_FAST));
+  }
+
   @Private
   public static String getClusterId(Configuration conf) {
     String clusterId = conf.get(YarnConfiguration.RM_CLUSTER_ID);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6772c3f4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index d586f51..8b3a3af 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -324,6 +324,22 @@
   </property>
 
   <property>
+    <description>Should RM fail fast if it encounters any errors. By defalt, it
+      points to ${yarn.fail-fast}. Errors include:
+      1) exceptions when state-store write/read operations fails.
+    </description>
+    <name>yarn.resourcemanager.fail-fast</name>
+    <value>${yarn.fail-fast}</value>
+  </property>
+
+  <property>
+    <description>Should YARN fail fast if it encounters any errors.
+    </description>
+    <name>yarn.fail-fast</name>
+    <value>true</value>
+  </property>
+
+  <property>
     <description>Enable RM work preserving recovery. This configuration is 
private
     to YARN for experimenting the feature.
     </description>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/6772c3f4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
index 46c2954..9b17bf7 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
@@ -44,6 +44,7 @@ import 
org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import 
org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -855,6 +856,7 @@ public abstract class RMStateStore extends AbstractService {
    * @param failureCause the exception due to which the operation failed
    */
   protected void notifyStoreOperationFailed(Exception failureCause) {
+    LOG.error("State store operation failed ", failureCause);
     if (failureCause instanceof StoreFencedException) {
       updateFencedState();
       Thread standByTransitionThread =
@@ -862,8 +864,11 @@ public abstract class RMStateStore extends AbstractService 
{
       standByTransitionThread.setName("StandByTransitionThread Handler");
       standByTransitionThread.start();
     } else {
-      rmDispatcher.getEventHandler().handle(
-        new RMFatalEvent(RMFatalEventType.STATE_STORE_OP_FAILED, 
failureCause));
+      if (YarnConfiguration.shouldRMFailFast(getConfig())) {
+        rmDispatcher.getEventHandler().handle(
+            new RMFatalEvent(RMFatalEventType.STATE_STORE_OP_FAILED,
+                failureCause));
+      }
     }
   }
  

Reply via email to