devmadhuu commented on code in PR #9416:
URL: https://github.com/apache/ozone/pull/9416#discussion_r2592771261


##########
hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryUnifiedControl.java:
##########
@@ -183,215 +264,300 @@ void testInitialState() {
   }
 
   /**
-   * Test successful single rebuild operation.
+   * Test single successful rebuild via queue.
    */
   @Test
   void testSingleSuccessfulRebuild() throws Exception {
-    // Setup successful rebuild
-    // Setup successful rebuild by default - no exception thrown
-    doNothing().when(mockNamespaceSummaryManager).clearNSSummaryTable();
+    AtomicBoolean rebuildExecuted = new AtomicBoolean(false);
+    CountDownLatch rebuildLatch = new CountDownLatch(1);
 
-    // Execute rebuild
-    TaskResult result = nsSummaryTask.reprocess(mockOMMetadataManager);
+    doAnswer(invocation -> {
+      rebuildExecuted.set(true);
+      rebuildLatch.countDown();
+      return null;
+    }).when(mockNamespaceSummaryManager).clearNSSummaryTable();
 
-    // Verify results
-    assertTrue(result.isTaskSuccess(), "Rebuild should succeed");
-    assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
-        "State should return to IDLE after successful rebuild");
-    
-    // Verify interactions
-    verify(mockNamespaceSummaryManager, times(1)).clearNSSummaryTable();
-  }
+    // Queue rebuild via production API
+    ReconTaskController.ReInitializationResult result =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
 
-  /**
-   * Test rebuild failure sets state to FAILED.
-   */
-  @Test
-  void testRebuildFailure() throws IOException {
-    // Setup failure scenario
-    doThrow(new IOException("Test 
failure")).when(mockNamespaceSummaryManager).clearNSSummaryTable();
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result,
+        "Rebuild should be queued successfully");
 
-    // Execute rebuild
-    TaskResult result = nsSummaryTask.reprocess(mockOMMetadataManager);
+    // Wait for async processing
+    assertTrue(rebuildLatch.await(10, TimeUnit.SECONDS),
+        "Rebuild should execute");
+    assertTrue(rebuildExecuted.get(), "Rebuild should have executed");
 
-    // Verify results
-    assertFalse(result.isTaskSuccess(), "Rebuild should fail");
-    assertEquals(RebuildState.FAILED, NSSummaryTask.getRebuildState(),
-        "State should be FAILED after rebuild failure");
+    // Allow time for state to return to IDLE
+    Thread.sleep(500);
+    assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
+        "State should return to IDLE after successful rebuild");
   }
 
   /**
-   * Test concurrent rebuild attempts - second call should be rejected.
+   * Test rebuild failure sets proper state.
    */
   @Test
-  void testConcurrentRebuildPrevention() throws Exception {
-    CountDownLatch startLatch = new CountDownLatch(1);
-    CountDownLatch finishLatch = new CountDownLatch(1);
-    AtomicBoolean firstRebuildStarted = new AtomicBoolean(false);
-    AtomicBoolean secondRebuildRejected = new AtomicBoolean(false);
+  void testRebuildFailure() throws Exception {
+    CountDownLatch failureLatch = new CountDownLatch(1);
 
-    // Setup first rebuild to block until we signal
     doAnswer(invocation -> {
-      firstRebuildStarted.set(true);
-      startLatch.countDown();
-      // Wait for test to signal completion
-      boolean awaitSuccess = finishLatch.await(10, TimeUnit.SECONDS);
-      if (!awaitSuccess) {
-        LOG.warn("finishLatch.await() timed out");
-      }
-      return null;
+      failureLatch.countDown();
+      throw new IOException("Test failure");
     }).when(mockNamespaceSummaryManager).clearNSSummaryTable();
 
-    ExecutorService executor = Executors.newFixedThreadPool(2);
+    ReconTaskController.ReInitializationResult result =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
 
-    try {
-      // Start first rebuild asynchronously
-      CompletableFuture<TaskResult> firstRebuild = 
CompletableFuture.supplyAsync(() -> {
-        LOG.info("Starting first rebuild");
-        return nsSummaryTask.reprocess(mockOMMetadataManager);
-      }, executor);
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result,
+        "Rebuild should be queued successfully");
 
-      // Wait for first rebuild to start
-      assertTrue(startLatch.await(5, TimeUnit.SECONDS), 
-          "First rebuild should start within timeout");
-      assertTrue(firstRebuildStarted.get(), "First rebuild should have 
started");
-      assertEquals(RebuildState.RUNNING, NSSummaryTask.getRebuildState(),
-          "State should be RUNNING during first rebuild");
-
-      // Attempt second rebuild - should be rejected immediately
-      CompletableFuture<TaskResult> secondRebuild = 
CompletableFuture.supplyAsync(() -> {
-        LOG.info("Attempting second rebuild");
-        TaskResult result = nsSummaryTask.reprocess(mockOMMetadataManager);
-        secondRebuildRejected.set(!result.isTaskSuccess());
-        return result;
-      }, executor);
-
-      // Get second rebuild result quickly (should be immediate rejection)
-      TaskResult secondResult = secondRebuild.get(2, TimeUnit.SECONDS);
-      assertFalse(secondResult.isTaskSuccess(), 
-          "Second rebuild should be rejected");
-      assertTrue(secondRebuildRejected.get(), "Second rebuild should have been 
rejected");
-
-      // Signal first rebuild to complete
-      finishLatch.countDown();
-      TaskResult firstResult = firstRebuild.get(5, TimeUnit.SECONDS);
-      assertTrue(firstResult.isTaskSuccess(), "First rebuild should succeed");
-
-      // Verify final state
-      assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
-          "State should return to IDLE after first rebuild completes");
+    assertTrue(failureLatch.await(10, TimeUnit.SECONDS),
+        "Rebuild should be attempted");
 
-    } finally {
-      finishLatch.countDown(); // Ensure cleanup
-      executor.shutdown();
-      executor.awaitTermination(5, TimeUnit.SECONDS);
-    }
+    // Allow time for state update
+    Thread.sleep(500);
+    assertEquals(RebuildState.FAILED, NSSummaryTask.getRebuildState(),
+        "State should be FAILED after rebuild failure");
   }
 
   /**
-   * Test that rebuild can be triggered again after failure.
+   * Test rebuild can be triggered again after failure.
    */
   @Test
   void testRebuildAfterFailure() throws Exception {
+    CountDownLatch firstAttempt = new CountDownLatch(1);
+    CountDownLatch secondAttempt = new CountDownLatch(1);
+    AtomicInteger attemptCount = new AtomicInteger(0);
+
+    // Setup mock to fail first time, succeed second time
+    doAnswer(invocation -> {
+      int attempt = attemptCount.incrementAndGet();
+      LOG.info("clearNSSummaryTable attempt #{}", attempt);
+      if (attempt == 1) {
+        firstAttempt.countDown();
+        throw new IOException("First failure");
+      } else {
+        secondAttempt.countDown();
+        return null;
+      }
+    }).when(mockNamespaceSummaryManager).clearNSSummaryTable();
+
     // First rebuild fails
-    doThrow(new IOException("Test 
failure")).when(mockNamespaceSummaryManager).clearNSSummaryTable();
-    
-    TaskResult failedResult = nsSummaryTask.reprocess(mockOMMetadataManager);
-    assertFalse(failedResult.isTaskSuccess(), "First rebuild should fail");
+    ReconTaskController.ReInitializationResult result1 =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result1,
+        "First event should be queued successfully");
+
+    assertTrue(firstAttempt.await(10, TimeUnit.SECONDS), "First rebuild should 
be attempted");
+    Thread.sleep(1000);
     assertEquals(RebuildState.FAILED, NSSummaryTask.getRebuildState(),
         "State should be FAILED after first rebuild");
 
-    // Second rebuild succeeds
-    // Setup successful rebuild by default - no exception thrown
-    doNothing().when(mockNamespaceSummaryManager).clearNSSummaryTable();
-    
-    TaskResult successResult = nsSummaryTask.reprocess(mockOMMetadataManager);
-    assertTrue(successResult.isTaskSuccess(), "Second rebuild should succeed");
+    // Second rebuild succeeds - wait for retry delay (2 seconds)
+    Thread.sleep(2100); // Wait for RETRY_DELAY_MS (2000ms) + buffer
+
+    ReconTaskController.ReInitializationResult result2 =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result2,
+        "Second event should be queued successfully after retry delay");
+
+    assertTrue(secondAttempt.await(10, TimeUnit.SECONDS), "Second rebuild 
should be attempted");
+    Thread.sleep(1000);
     assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
         "State should be IDLE after successful rebuild");
   }
 
   /**
-   * Test multiple concurrent attempts - only one should succeed, others 
rejected.
+   * Test multiple concurrent queueReInitializationEvent() calls.
+   *
+   * This is the KEY test for production behavior - multiple threads
+   * simultaneously calling queueReInitializationEvent(), which is what
+   * actually happens in production (not direct reprocess() calls).
+   *
+   * <p>Important: The queue-based architecture provides SEQUENTIAL processing,
+   * not event deduplication. Multiple successfully queued events will execute
+   * sequentially (not concurrently). The AtomicReference in NSSummaryTask
+   * prevents concurrent execution within a single reprocess() call.
    */
   @Test
-  @Flaky("HDDS-13573")
+  @SuppressWarnings("methodLength")

Review Comment:
   done.



##########
hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryUnifiedControl.java:
##########
@@ -183,215 +264,300 @@ void testInitialState() {
   }
 
   /**
-   * Test successful single rebuild operation.
+   * Test single successful rebuild via queue.
    */
   @Test
   void testSingleSuccessfulRebuild() throws Exception {
-    // Setup successful rebuild
-    // Setup successful rebuild by default - no exception thrown
-    doNothing().when(mockNamespaceSummaryManager).clearNSSummaryTable();
+    AtomicBoolean rebuildExecuted = new AtomicBoolean(false);
+    CountDownLatch rebuildLatch = new CountDownLatch(1);
 
-    // Execute rebuild
-    TaskResult result = nsSummaryTask.reprocess(mockOMMetadataManager);
+    doAnswer(invocation -> {
+      rebuildExecuted.set(true);
+      rebuildLatch.countDown();
+      return null;
+    }).when(mockNamespaceSummaryManager).clearNSSummaryTable();
 
-    // Verify results
-    assertTrue(result.isTaskSuccess(), "Rebuild should succeed");
-    assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
-        "State should return to IDLE after successful rebuild");
-    
-    // Verify interactions
-    verify(mockNamespaceSummaryManager, times(1)).clearNSSummaryTable();
-  }
+    // Queue rebuild via production API
+    ReconTaskController.ReInitializationResult result =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
 
-  /**
-   * Test rebuild failure sets state to FAILED.
-   */
-  @Test
-  void testRebuildFailure() throws IOException {
-    // Setup failure scenario
-    doThrow(new IOException("Test 
failure")).when(mockNamespaceSummaryManager).clearNSSummaryTable();
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result,
+        "Rebuild should be queued successfully");
 
-    // Execute rebuild
-    TaskResult result = nsSummaryTask.reprocess(mockOMMetadataManager);
+    // Wait for async processing
+    assertTrue(rebuildLatch.await(10, TimeUnit.SECONDS),
+        "Rebuild should execute");
+    assertTrue(rebuildExecuted.get(), "Rebuild should have executed");
 
-    // Verify results
-    assertFalse(result.isTaskSuccess(), "Rebuild should fail");
-    assertEquals(RebuildState.FAILED, NSSummaryTask.getRebuildState(),
-        "State should be FAILED after rebuild failure");
+    // Allow time for state to return to IDLE
+    Thread.sleep(500);
+    assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
+        "State should return to IDLE after successful rebuild");
   }
 
   /**
-   * Test concurrent rebuild attempts - second call should be rejected.
+   * Test rebuild failure sets proper state.
    */
   @Test
-  void testConcurrentRebuildPrevention() throws Exception {
-    CountDownLatch startLatch = new CountDownLatch(1);
-    CountDownLatch finishLatch = new CountDownLatch(1);
-    AtomicBoolean firstRebuildStarted = new AtomicBoolean(false);
-    AtomicBoolean secondRebuildRejected = new AtomicBoolean(false);
+  void testRebuildFailure() throws Exception {
+    CountDownLatch failureLatch = new CountDownLatch(1);
 
-    // Setup first rebuild to block until we signal
     doAnswer(invocation -> {
-      firstRebuildStarted.set(true);
-      startLatch.countDown();
-      // Wait for test to signal completion
-      boolean awaitSuccess = finishLatch.await(10, TimeUnit.SECONDS);
-      if (!awaitSuccess) {
-        LOG.warn("finishLatch.await() timed out");
-      }
-      return null;
+      failureLatch.countDown();
+      throw new IOException("Test failure");
     }).when(mockNamespaceSummaryManager).clearNSSummaryTable();
 
-    ExecutorService executor = Executors.newFixedThreadPool(2);
+    ReconTaskController.ReInitializationResult result =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
 
-    try {
-      // Start first rebuild asynchronously
-      CompletableFuture<TaskResult> firstRebuild = 
CompletableFuture.supplyAsync(() -> {
-        LOG.info("Starting first rebuild");
-        return nsSummaryTask.reprocess(mockOMMetadataManager);
-      }, executor);
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result,
+        "Rebuild should be queued successfully");
 
-      // Wait for first rebuild to start
-      assertTrue(startLatch.await(5, TimeUnit.SECONDS), 
-          "First rebuild should start within timeout");
-      assertTrue(firstRebuildStarted.get(), "First rebuild should have 
started");
-      assertEquals(RebuildState.RUNNING, NSSummaryTask.getRebuildState(),
-          "State should be RUNNING during first rebuild");
-
-      // Attempt second rebuild - should be rejected immediately
-      CompletableFuture<TaskResult> secondRebuild = 
CompletableFuture.supplyAsync(() -> {
-        LOG.info("Attempting second rebuild");
-        TaskResult result = nsSummaryTask.reprocess(mockOMMetadataManager);
-        secondRebuildRejected.set(!result.isTaskSuccess());
-        return result;
-      }, executor);
-
-      // Get second rebuild result quickly (should be immediate rejection)
-      TaskResult secondResult = secondRebuild.get(2, TimeUnit.SECONDS);
-      assertFalse(secondResult.isTaskSuccess(), 
-          "Second rebuild should be rejected");
-      assertTrue(secondRebuildRejected.get(), "Second rebuild should have been 
rejected");
-
-      // Signal first rebuild to complete
-      finishLatch.countDown();
-      TaskResult firstResult = firstRebuild.get(5, TimeUnit.SECONDS);
-      assertTrue(firstResult.isTaskSuccess(), "First rebuild should succeed");
-
-      // Verify final state
-      assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
-          "State should return to IDLE after first rebuild completes");
+    assertTrue(failureLatch.await(10, TimeUnit.SECONDS),
+        "Rebuild should be attempted");
 
-    } finally {
-      finishLatch.countDown(); // Ensure cleanup
-      executor.shutdown();
-      executor.awaitTermination(5, TimeUnit.SECONDS);
-    }
+    // Allow time for state update
+    Thread.sleep(500);
+    assertEquals(RebuildState.FAILED, NSSummaryTask.getRebuildState(),
+        "State should be FAILED after rebuild failure");
   }
 
   /**
-   * Test that rebuild can be triggered again after failure.
+   * Test rebuild can be triggered again after failure.
    */
   @Test
   void testRebuildAfterFailure() throws Exception {
+    CountDownLatch firstAttempt = new CountDownLatch(1);
+    CountDownLatch secondAttempt = new CountDownLatch(1);
+    AtomicInteger attemptCount = new AtomicInteger(0);
+
+    // Setup mock to fail first time, succeed second time
+    doAnswer(invocation -> {
+      int attempt = attemptCount.incrementAndGet();
+      LOG.info("clearNSSummaryTable attempt #{}", attempt);
+      if (attempt == 1) {
+        firstAttempt.countDown();
+        throw new IOException("First failure");
+      } else {
+        secondAttempt.countDown();
+        return null;
+      }
+    }).when(mockNamespaceSummaryManager).clearNSSummaryTable();
+
     // First rebuild fails
-    doThrow(new IOException("Test 
failure")).when(mockNamespaceSummaryManager).clearNSSummaryTable();
-    
-    TaskResult failedResult = nsSummaryTask.reprocess(mockOMMetadataManager);
-    assertFalse(failedResult.isTaskSuccess(), "First rebuild should fail");
+    ReconTaskController.ReInitializationResult result1 =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result1,
+        "First event should be queued successfully");
+
+    assertTrue(firstAttempt.await(10, TimeUnit.SECONDS), "First rebuild should 
be attempted");
+    Thread.sleep(1000);
     assertEquals(RebuildState.FAILED, NSSummaryTask.getRebuildState(),
         "State should be FAILED after first rebuild");
 
-    // Second rebuild succeeds
-    // Setup successful rebuild by default - no exception thrown
-    doNothing().when(mockNamespaceSummaryManager).clearNSSummaryTable();
-    
-    TaskResult successResult = nsSummaryTask.reprocess(mockOMMetadataManager);
-    assertTrue(successResult.isTaskSuccess(), "Second rebuild should succeed");
+    // Second rebuild succeeds - wait for retry delay (2 seconds)
+    Thread.sleep(2100); // Wait for RETRY_DELAY_MS (2000ms) + buffer
+
+    ReconTaskController.ReInitializationResult result2 =
+        taskController.queueReInitializationEvent(
+            
ReconTaskReInitializationEvent.ReInitializationReason.MANUAL_TRIGGER);
+    assertEquals(ReconTaskController.ReInitializationResult.SUCCESS, result2,
+        "Second event should be queued successfully after retry delay");
+
+    assertTrue(secondAttempt.await(10, TimeUnit.SECONDS), "Second rebuild 
should be attempted");
+    Thread.sleep(1000);
     assertEquals(RebuildState.IDLE, NSSummaryTask.getRebuildState(),
         "State should be IDLE after successful rebuild");
   }
 
   /**
-   * Test multiple concurrent attempts - only one should succeed, others 
rejected.
+   * Test multiple concurrent queueReInitializationEvent() calls.
+   *
+   * This is the KEY test for production behavior - multiple threads
+   * simultaneously calling queueReInitializationEvent(), which is what
+   * actually happens in production (not direct reprocess() calls).
+   *
+   * <p>Important: The queue-based architecture provides SEQUENTIAL processing,
+   * not event deduplication. Multiple successfully queued events will execute
+   * sequentially (not concurrently). The AtomicReference in NSSummaryTask
+   * prevents concurrent execution within a single reprocess() call.
    */
   @Test
-  @Flaky("HDDS-13573")
+  @SuppressWarnings("methodLength")

Review Comment:
   done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to