bbeaudreault commented on code in PR #5534:
URL: https://github.com/apache/hbase/pull/5534#discussion_r1409521670


##########
hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java:
##########
@@ -133,39 +182,66 @@ protected Flow executeFromState(MasterProcedureEnv env, 
ReopenTableRegionsState
             regionNode.unlock();
           }
           addChildProcedure(proc);
+          regionsReopened++;
         }
         
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
         return Flow.HAS_MORE_STATE;
       case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
         regions = 
regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
           .filter(l -> l != null).collect(Collectors.toList());
-        if (regions.isEmpty()) {
-          return Flow.NO_MORE_STATE;
+        // we need to create a set of region names because the HRegionLocation 
hashcode is only
+        // based
+        // on the server name
+        Set<byte[]> currentRegionBatchNames = currentRegionBatch.stream()
+          .map(r -> r.getRegion().getRegionName()).collect(Collectors.toSet());
+        currentRegionBatch = regions.stream()
+          .filter(r -> 
currentRegionBatchNames.contains(r.getRegion().getRegionName()))
+          .collect(Collectors.toList());

Review Comment:
   Are you sure we need to do this? What if we did something like this:
   
   ```java
     case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
       // update region lists based on what's been reopened
       regions = filterReopened(regions);
       currentRegionBatch = filterReopened(currentRegionBatch)
   
       // existing batch didn't fully reopen, so try to resolve that first.
       // since this is a retry, don't do the batch backoff
       if (!currentRegionBatch.isEmpty()) {
         return reopenIfSchedulable(currentRegionBatch, false);
       }
   
      if (regions.isEmpty()) {
        return Flow.NO_MORE_STATE;
      }
   
       // no batching or batch is finished, schedule more from main regions
       return reopenIfSchedulable(regions, true);
     default:
       throw new UnsupportedOperationException("unhandled state=" + state);
   }
   
   ...
   
   private List<HRegionLocation> filterReopened(List<HRegionLocation> 
regionsToCheck) {
     return 
regionsToCheck.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
             .filter(l -> l != null).collect(Collectors.toList());
   }
   
   private void reopenIfSchedulable(List<HRegionLocations> regionsToReopen, 
boolean shouldBatchBackoff) throws ProcedureSuspendedException {
     if (regionsToReopen.stream().anyMatch(loc -> canSchedule(env, loc))) {
       retryCounter = null;
       
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
       if (shouldBatchBackoff && reopenBatchBackoffMillis > 0) {
         reopenBatchSize = Math.min(reopenBatchSizeMax, 2 * reopenBatchSize);
         setBackoffStateAndSuspend(reopenBatchBackoffMillis);
       } else {
         return Flow.HAS_MORE_STATE;
       }
     }
   
     // We can not schedule TRSP for all the regions need to reopen, wait for a 
while and retry
     // again.
     if (retryCounter == null) {
       retryCounter = 
ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
     }
     long backoffMillis = retryCounter.getBackoffTimeAndIncrementAttempts();
     LOG.info(
       "There are still {} region(s) which need to be reopened for table {}. {} 
are in "
         + "OPENING state, suspend {}secs and try again later",
       regions.size(), tableName, regionsToCheck.size(), backoffMillis / 1000);
     setBackoffStateAndSuspend(backoffMillis);
   }
   ```
   
   It feels cleaner to read and understand what's happening. As mentioned in 
another comment, I'd recommend updating REOPEN_REGIONS to:
   
   ```java
   // if we didn't finish reopening the last batch yet, let's keep trying until 
we do.
   // at that point, the batch will be empty and we can generate a new batch
   if (currentRegionBatch.isEmpty()) {
     currentRegionBatch = 
regions.stream().limit(reopenBatchSize).collect(Collectors.toList());
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to