[ 
https://issues.apache.org/jira/browse/HDFS-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17650973#comment-17650973
 ] 

ASF GitHub Bot commented on HDFS-16831:
---------------------------------------

simbadzina commented on code in PR #5098:
URL: https://github.com/apache/hadoop/pull/5098#discussion_r1054720868


##########
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestNamenodeResolver.java:
##########
@@ -90,6 +90,83 @@ public void setup() throws IOException, InterruptedException 
{
     assertTrue(cleared);
   }
 
+  @Test
+  public void testShuffleObserverNNs() throws Exception {
+    // Add an active entry to the store
+    NamenodeStatusReport activeReport = createNamenodeReport(
+        NAMESERVICES[0], NAMENODES[0], HAServiceState.ACTIVE);
+    assertTrue(namenodeResolver.registerNamenode(activeReport));
+
+    // Add a standby entry to the store
+    NamenodeStatusReport standbyReport = createNamenodeReport(
+        NAMESERVICES[0], NAMENODES[1], HAServiceState.STANDBY);
+    assertTrue(namenodeResolver.registerNamenode(standbyReport));
+
+    // Load cache
+    stateStore.refreshCaches(true);
+
+    // Get namenodes from state store.
+    List<? extends FederationNamenodeContext> withoutObserver =
+        namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0], true);
+    assertEquals(2, withoutObserver.size());
+    assertEquals(FederationNamenodeServiceState.ACTIVE, 
withoutObserver.get(0).getState());
+    assertEquals(FederationNamenodeServiceState.STANDBY, 
withoutObserver.get(1).getState());
+
+    // Get namenodes from cache.
+    withoutObserver = 
namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0], true);
+    assertEquals(2, withoutObserver.size());
+    assertEquals(FederationNamenodeServiceState.ACTIVE, 
withoutObserver.get(0).getState());
+    assertEquals(FederationNamenodeServiceState.STANDBY, 
withoutObserver.get(1).getState());
+
+    // Add an observer entry to the store
+    NamenodeStatusReport observerReport1 = createNamenodeReport(
+        NAMESERVICES[0], NAMENODES[2], HAServiceState.OBSERVER);
+    assertTrue(namenodeResolver.registerNamenode(observerReport1));
+
+    // Load cache
+    stateStore.refreshCaches(true);
+
+    // Get namenodes from state store.
+    List<? extends FederationNamenodeContext> observerList =
+        namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0], true);
+    assertEquals(3, observerList.size());
+    assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList.get(0).getState());
+    assertEquals(FederationNamenodeServiceState.ACTIVE, 
observerList.get(1).getState());
+    assertEquals(FederationNamenodeServiceState.STANDBY, 
observerList.get(2).getState());
+
+    // Get namenodes from cache.
+    observerList = 
namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0], true);
+    assertEquals(3, observerList.size());
+    assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList.get(0).getState());
+    assertEquals(FederationNamenodeServiceState.ACTIVE, 
observerList.get(1).getState());
+    assertEquals(FederationNamenodeServiceState.STANDBY, 
observerList.get(2).getState());
+
+    // Add one new observer entry to the store
+    NamenodeStatusReport observerReport2 = createNamenodeReport(
+        NAMESERVICES[0], NAMENODES[3], HAServiceState.OBSERVER);
+    assertTrue(namenodeResolver.registerNamenode(observerReport2));
+
+    // Load cache
+    stateStore.refreshCaches(true);
+
+    // Get namenodes from state store.
+    List<? extends FederationNamenodeContext> observerList2 =
+        namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0], true);
+    assertEquals(4, observerList2.size());
+    assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList2.get(0).getState());
+    assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList2.get(1).getState());
+    assertEquals(FederationNamenodeServiceState.ACTIVE, 
observerList2.get(2).getState());
+    assertEquals(FederationNamenodeServiceState.STANDBY, 
observerList2.get(3).getState());
+
+    // Get namenodes from cache.
+    observerList2 = 
namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0], true);
+    assertEquals(4, observerList2.size());
+    assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList2.get(0).getState());
+    assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList2.get(1).getState());
+    assertEquals(FederationNamenodeServiceState.ACTIVE, 
observerList2.get(2).getState());
+    assertEquals(FederationNamenodeServiceState.STANDBY, 
observerList2.get(3).getState());

Review Comment:
   To test the shuffling, one approach would be to call 
namenodeResolver.getNamenodesForNameserviceId() 100 times in a loop and check 
if ever the two observers swap positions in the list. If there is never a swap 
we fail the test.
   
   The test will be probabilistic but the chance of it flaking will be 
extremely low 0.5^100. 
   
   > List<? extends FederationNamenodeContext> observerList3 ;
   >     for(int i = 0; i < 100; i++) {
   >       observerList3 =
   >           namenodeResolver.getNamenodesForNameserviceId(NAMESERVICES[0], 
true);
   >       assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList3.get(0).getState());
   >       assertEquals(FederationNamenodeServiceState.OBSERVER, 
observerList3.get(1).getState());
   >       if 
(observerList3.get(0).getNamenodeId().equals(observerList2.get(1).getNamenodeId())
 &&
   >           
observerList3.get(1).getNamenodeId().equals(observerList2.get(0).getNamenodeId()))
 {
   >         return;
   >       }
   >     }
   >     Assert.fail("Observer order never changed.");





> [RBF SBN] GetNamenodesForNameserviceId should shuffle Observer NameNodes 
> every time
> -----------------------------------------------------------------------------------
>
>                 Key: HDFS-16831
>                 URL: https://issues.apache.org/jira/browse/HDFS-16831
>             Project: Hadoop HDFS
>          Issue Type: Bug
>            Reporter: ZanderXu
>            Assignee: ZanderXu
>            Priority: Major
>              Labels: pull-request-available
>
> The method getNamenodesForNameserviceId in MembershipNamenodeResolver.class 
> should shuffle Observer NameNodes every time. The current logic will return 
> the cached list and will caused all of read requests are forwarding to the 
> first observer namenode. 
>  
> The related code as bellow:
> {code:java}
> @Override
> public List<? extends FederationNamenodeContext> getNamenodesForNameserviceId(
>     final String nsId, boolean listObserversFirst) throws IOException {
>   List<? extends FederationNamenodeContext> ret = cacheNS.get(Pair.of(nsId, 
> listObserversFirst));
>   if (ret != null) {
>     return ret;
>   } 
>   ...
> }{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to