[ 
https://issues.apache.org/jira/browse/HDFS-16844?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17634620#comment-17634620
 ] 

ASF GitHub Bot commented on HDFS-16844:
---------------------------------------

goiri commented on code in PR #5138:
URL: https://github.com/apache/hadoop/pull/5138#discussion_r1023366622


##########
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java:
##########
@@ -125,7 +125,6 @@ public boolean loadCache(boolean force) throws IOException {
       } catch (IOException e) {
         LOG.error("Cannot get \"{}\" records from the State Store",
             getRecordClass().getSimpleName());
-        this.initialized = false;

Review Comment:
   Don't we want to keep this?



##########
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java:
##########
@@ -82,4 +96,141 @@ public void testSerialization() throws IOException {
 
     validateRecord(newRecord);
   }
+
+  /**
+   * A mock StateStoreDriver that runs in memory and can cause errors.
+   */
+  public static class MockStateStoreDriver extends StateStoreBaseImpl {
+    boolean giveErrors = false;
+    boolean initialized = false;
+    Map<String, Map<String, BaseRecord>> valueMap = new HashMap<>();
+
+    @Override
+    public boolean initDriver() {
+      initialized = true;
+      return true;
+    }
+
+    @Override
+    public <T extends BaseRecord> boolean initRecordStorage(String className,
+                                                            Class<T> clazz) {
+      return true;
+    }
+
+    @Override
+    public boolean isDriverReady() {
+      return initialized;
+    }
+
+    @Override
+    public void close() throws Exception {
+      valueMap.clear();
+      initialized = false;
+    }
+
+    private void checkErrors() throws IOException {
+      if (giveErrors) {
+        throw new IOException("Induced errors");
+      }
+    }
+
+    @Override
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    public <T extends BaseRecord> QueryResult get(Class<T> clazz) throws 
IOException {
+      checkErrors();
+      Map<String, BaseRecord> map = 
valueMap.get(StateStoreUtils.getRecordName(clazz));
+      List<BaseRecord> results = map != null
+          ? new ArrayList<>(map.values()) : new ArrayList<>();
+      return new QueryResult<>(results, System.currentTimeMillis());
+    }
+
+    @Override
+    public <T extends BaseRecord> boolean putAll(List<T> records,
+                                                 boolean allowUpdate,
+                                                 boolean errorIfExists)
+        throws IOException {
+      checkErrors();
+      for (T record: records) {
+        Map<String, BaseRecord> map =
+            
valueMap.computeIfAbsent(StateStoreUtils.getRecordName(record.getClass()),
+                k -> new HashMap<>());
+        String key = record.getPrimaryKey();
+        BaseRecord oldRecord = map.get(key);
+        if (oldRecord == null || allowUpdate) {
+          map.put(key, record);
+        } else if (errorIfExists) {
+          throw new IOException("Record already exists for " + 
record.getClass()
+              + ": " + key);
+        }
+      }
+      return true;
+    }
+
+    @Override
+    public <T extends BaseRecord> boolean removeAll(Class<T> clazz) throws 
IOException {
+      checkErrors();
+      valueMap.remove(StateStoreUtils.getRecordName(clazz));

Review Comment:
   Should we do return != null?



##########
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java:
##########
@@ -82,4 +96,141 @@ public void testSerialization() throws IOException {
 
     validateRecord(newRecord);
   }
+
+  /**
+   * A mock StateStoreDriver that runs in memory and can cause errors.
+   */
+  public static class MockStateStoreDriver extends StateStoreBaseImpl {

Review Comment:
   Maybe is cleaner to put it into a separate file.



##########
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java:
##########
@@ -82,4 +96,141 @@ public void testSerialization() throws IOException {
 
     validateRecord(newRecord);
   }
+
+  /**
+   * A mock StateStoreDriver that runs in memory and can cause errors.
+   */
+  public static class MockStateStoreDriver extends StateStoreBaseImpl {
+    boolean giveErrors = false;
+    boolean initialized = false;
+    Map<String, Map<String, BaseRecord>> valueMap = new HashMap<>();
+
+    @Override
+    public boolean initDriver() {
+      initialized = true;
+      return true;
+    }
+
+    @Override
+    public <T extends BaseRecord> boolean initRecordStorage(String className,
+                                                            Class<T> clazz) {
+      return true;
+    }
+
+    @Override
+    public boolean isDriverReady() {
+      return initialized;
+    }
+
+    @Override
+    public void close() throws Exception {
+      valueMap.clear();
+      initialized = false;
+    }
+
+    private void checkErrors() throws IOException {
+      if (giveErrors) {
+        throw new IOException("Induced errors");
+      }
+    }
+
+    @Override
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    public <T extends BaseRecord> QueryResult get(Class<T> clazz) throws 
IOException {
+      checkErrors();
+      Map<String, BaseRecord> map = 
valueMap.get(StateStoreUtils.getRecordName(clazz));
+      List<BaseRecord> results = map != null

Review Comment:
   With the new line 100 limit you could fit this into a single one.
   Same thing in a couple more places.



##########
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestRouterState.java:
##########
@@ -82,4 +96,141 @@ public void testSerialization() throws IOException {
 
     validateRecord(newRecord);
   }
+
+  /**
+   * A mock StateStoreDriver that runs in memory and can cause errors.
+   */
+  public static class MockStateStoreDriver extends StateStoreBaseImpl {
+    boolean giveErrors = false;
+    boolean initialized = false;
+    Map<String, Map<String, BaseRecord>> valueMap = new HashMap<>();
+
+    @Override
+    public boolean initDriver() {
+      initialized = true;
+      return true;
+    }
+
+    @Override
+    public <T extends BaseRecord> boolean initRecordStorage(String className,
+                                                            Class<T> clazz) {
+      return true;
+    }
+
+    @Override
+    public boolean isDriverReady() {
+      return initialized;
+    }
+
+    @Override
+    public void close() throws Exception {
+      valueMap.clear();
+      initialized = false;
+    }
+
+    private void checkErrors() throws IOException {
+      if (giveErrors) {
+        throw new IOException("Induced errors");
+      }
+    }
+
+    @Override
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    public <T extends BaseRecord> QueryResult get(Class<T> clazz) throws 
IOException {
+      checkErrors();
+      Map<String, BaseRecord> map = 
valueMap.get(StateStoreUtils.getRecordName(clazz));
+      List<BaseRecord> results = map != null
+          ? new ArrayList<>(map.values()) : new ArrayList<>();
+      return new QueryResult<>(results, System.currentTimeMillis());
+    }
+
+    @Override
+    public <T extends BaseRecord> boolean putAll(List<T> records,
+                                                 boolean allowUpdate,
+                                                 boolean errorIfExists)
+        throws IOException {
+      checkErrors();
+      for (T record: records) {
+        Map<String, BaseRecord> map =
+            
valueMap.computeIfAbsent(StateStoreUtils.getRecordName(record.getClass()),
+                k -> new HashMap<>());
+        String key = record.getPrimaryKey();
+        BaseRecord oldRecord = map.get(key);
+        if (oldRecord == null || allowUpdate) {
+          map.put(key, record);
+        } else if (errorIfExists) {
+          throw new IOException("Record already exists for " + 
record.getClass()
+              + ": " + key);
+        }
+      }
+      return true;
+    }
+
+    @Override
+    public <T extends BaseRecord> boolean removeAll(Class<T> clazz) throws 
IOException {
+      checkErrors();
+      valueMap.remove(StateStoreUtils.getRecordName(clazz));
+      return true;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public <T extends BaseRecord> int remove(Class<T> clazz,
+                                             Query<T> query)
+        throws IOException {
+      checkErrors();
+      int result = 0;
+      Map<String, BaseRecord> map =
+          valueMap.get(StateStoreUtils.getRecordName(clazz));
+      if (map != null) {
+        for (Iterator<BaseRecord> itr = map.values().iterator(); 
itr.hasNext(); ) {
+          BaseRecord record = itr.next();
+          if (query.matches((T) record)) {
+            itr.remove();
+            result += 1;
+          }
+        }
+      }
+      return result;
+    }
+  }
+
+  @Test
+  public void testStateStoreResilience() throws Exception {
+    StateStoreService service = new StateStoreService();
+    Configuration conf = new Configuration();
+    conf.setClass(RBFConfigKeys.FEDERATION_STORE_DRIVER_CLASS,
+        MockStateStoreDriver.class,

Review Comment:
   Wouldn't it make sense to just use StateStoreFileImpl?





> [RBF] The routers should be resiliant against exceptions from StateStore
> ------------------------------------------------------------------------
>
>                 Key: HDFS-16844
>                 URL: https://issues.apache.org/jira/browse/HDFS-16844
>             Project: Hadoop HDFS
>          Issue Type: Improvement
>          Components: rbf
>    Affects Versions: 3.3.4
>            Reporter: Owen O'Malley
>            Assignee: Owen O'Malley
>            Priority: Major
>              Labels: pull-request-available
>
> Currently, a single exception from the StateStore will cripple a router by 
> clearing the caches before the replacement is loaded. Since the routers have 
> the information in an in-memory cache, it is better to keep running. There is 
> still the timeout that will push the router into safe-mode if it can't load 
> the state store over a longer period of time.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to