Apache9 commented on code in PR #5037:
URL: https://github.com/apache/hbase/pull/5037#discussion_r1110921422


##########
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java:
##########
@@ -217,6 +217,223 @@ private boolean tryComplete(LocateRequest req, 
CompletableFuture<RegionLocations
     }
   }
 
+  /**
+   * Wrapper around ConcurrentSkipListMap ensuring proper access to cached 
items. Updates are
+   * synchronized, but reads are not.
+   */
+  private static final class RegionLocationCache {
+
+    private final ConcurrentNavigableMap<byte[], RegionLocations> cache =
+      new ConcurrentSkipListMap<>(BYTES_COMPARATOR);
+
+    /**
+     * Add the given locations to the cache, merging with existing if 
necessary. Also cleans out any
+     * previously cached locations which may have been superseded by this one 
(i.e. in case of
+     * merged regions). See {@link #cleanOverlappingRegions(RegionLocations)}
+     * @param locs the locations to cache
+     * @return the final location (possibly merged) that was added to the cache
+     */
+    public synchronized RegionLocations add(RegionLocations locs) {
+      byte[] startKey = locs.getRegionLocation().getRegion().getStartKey();
+      RegionLocations oldLocs = cache.putIfAbsent(startKey, locs);
+      if (oldLocs == null) {
+        cleanOverlappingRegions(locs);
+        return locs;
+      }
+
+      // check whether the regions are the same, this usually happens when 
table is split/merged,
+      // or deleted and recreated again.
+      RegionInfo region = locs.getRegionLocation().getRegion();
+      RegionInfo oldRegion = oldLocs.getRegionLocation().getRegion();
+      if (region.getEncodedName().equals(oldRegion.getEncodedName())) {
+        RegionLocations mergedLocs = oldLocs.mergeLocations(locs);
+        if (isEqual(mergedLocs, oldLocs)) {
+          // the merged one is the same with the old one, give up
+          LOG.trace("Will not add {} to cache because the old value {} "
+            + " is newer than us or has the same server name."
+            + " Maybe it is updated before we replace it", locs, oldLocs);
+          return oldLocs;
+        }
+        locs = mergedLocs;
+      } else {
+        // the region is different, here we trust the one we fetched. This 
maybe wrong but finally
+        // the upper layer can detect this and trigger removal of the wrong 
locations
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("The newly fetch region {} is different from the old one 
{} for row '{}',"
+            + " try replaying the old one...", region, oldRegion, 
Bytes.toStringBinary(startKey));
+        }
+      }
+
+      cache.put(startKey, locs);
+      cleanOverlappingRegions(locs);
+      return locs;
+    }
+
+    /**
+     * When caching a location, the region may have been the result of a 
merge. Check to see if the
+     * region's boundaries overlap any other cached locations. Those would 
have been merge parents
+     * which no longer exist. We need to proactively clear them out to avoid a 
case where a merged
+     * region which receives no requests never gets cleared. This causes 
requests to other merged
+     * regions after it to see the wrong cached location. See HBASE-27650
+     * @param locations the new location that was just cached
+     */
+    private void cleanOverlappingRegions(RegionLocations locations) {
+      RegionInfo region = locations.getRegionLocation().getRegion();
+
+      boolean isLast = isEmptyStopRow(region.getEndKey());
+
+      while (true) {
+        Map.Entry<byte[], RegionLocations> overlap =
+          isLast ? cache.lastEntry() : cache.lowerEntry(region.getEndKey());
+        if (
+          overlap == null || overlap.getValue() == locations
+            || Bytes.equals(overlap.getKey(), region.getStartKey())
+        ) {
+          break;
+        }
+
+        if (LOG.isInfoEnabled()) {
+          LOG.info(
+            "Removing cached location {} (endKey={}) because it overlaps with "
+              + "new location {} (endKey={})",
+            overlap.getValue(),
+            
Bytes.toStringBinary(overlap.getValue().getRegionLocation().getRegion().getEndKey()),
+            locations, 
Bytes.toStringBinary(locations.getRegionLocation().getRegion().getEndKey()));
+        }
+
+        cache.remove(overlap.getKey());
+      }
+    }
+
+    private boolean isEqual(RegionLocations locs1, RegionLocations locs2) {
+      HRegionLocation[] locArr1 = locs1.getRegionLocations();
+      HRegionLocation[] locArr2 = locs2.getRegionLocations();
+      if (locArr1.length != locArr2.length) {
+        return false;
+      }
+      for (int i = 0; i < locArr1.length; i++) {
+        // do not need to compare region info
+        HRegionLocation loc1 = locArr1[i];
+        HRegionLocation loc2 = locArr2[i];
+        if (loc1 == null) {
+          if (loc2 != null) {
+            return false;
+          }
+        } else {
+          if (loc2 == null) {
+            return false;
+          }
+          if (loc1.getSeqNum() != loc2.getSeqNum()) {
+            return false;
+          }
+          if (!Objects.equal(loc1.getServerName(), loc2.getServerName())) {
+            return false;
+          }
+        }
+      }
+      return true;
+    }
+
+    /** Returns all cached RegionLocations */
+    public Collection<RegionLocations> getAll() {
+      return cache.values();

Review Comment:
   Better wrap it with unmodifiedCollection



##########
hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java:
##########
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.client;
 
+import static org.junit.Assert.*;

Review Comment:
   Avoid start imports.



##########
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java:
##########
@@ -217,6 +217,223 @@ private boolean tryComplete(LocateRequest req, 
CompletableFuture<RegionLocations
     }
   }
 
+  /**
+   * Wrapper around ConcurrentSkipListMap ensuring proper access to cached 
items. Updates are
+   * synchronized, but reads are not.
+   */
+  private static final class RegionLocationCache {

Review Comment:
   Better move this to a separated file, it is bit enough :)



##########
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java:
##########
@@ -450,13 +601,11 @@ private RegionLocations locateRowInCache(TableCache 
tableCache, TableName tableN
   private RegionLocations locateRowBeforeInCache(TableCache tableCache, 
TableName tableName,
     byte[] row, int replicaId) {
     boolean isEmptyStopRow = isEmptyStopRow(row);
-    Map.Entry<byte[], RegionLocations> entry =
-      isEmptyStopRow ? tableCache.cache.lastEntry() : 
tableCache.cache.lowerEntry(row);
-    if (entry == null) {
+    RegionLocations locs = 
tableCache.regionLocationCache.findForBeforeRow(row);

Review Comment:
   Do we still need the below check for whether the row is contained in the 
region? I suppose this should be done in findForBeforeRow already?



##########
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java:
##########
@@ -217,6 +217,223 @@ private boolean tryComplete(LocateRequest req, 
CompletableFuture<RegionLocations
     }
   }
 
+  /**
+   * Wrapper around ConcurrentSkipListMap ensuring proper access to cached 
items. Updates are
+   * synchronized, but reads are not.
+   */
+  private static final class RegionLocationCache {
+
+    private final ConcurrentNavigableMap<byte[], RegionLocations> cache =
+      new ConcurrentSkipListMap<>(BYTES_COMPARATOR);
+
+    /**
+     * Add the given locations to the cache, merging with existing if 
necessary. Also cleans out any
+     * previously cached locations which may have been superseded by this one 
(i.e. in case of
+     * merged regions). See {@link #cleanOverlappingRegions(RegionLocations)}
+     * @param locs the locations to cache
+     * @return the final location (possibly merged) that was added to the cache
+     */
+    public synchronized RegionLocations add(RegionLocations locs) {
+      byte[] startKey = locs.getRegionLocation().getRegion().getStartKey();
+      RegionLocations oldLocs = cache.putIfAbsent(startKey, locs);
+      if (oldLocs == null) {
+        cleanOverlappingRegions(locs);
+        return locs;
+      }
+
+      // check whether the regions are the same, this usually happens when 
table is split/merged,
+      // or deleted and recreated again.
+      RegionInfo region = locs.getRegionLocation().getRegion();
+      RegionInfo oldRegion = oldLocs.getRegionLocation().getRegion();
+      if (region.getEncodedName().equals(oldRegion.getEncodedName())) {
+        RegionLocations mergedLocs = oldLocs.mergeLocations(locs);
+        if (isEqual(mergedLocs, oldLocs)) {
+          // the merged one is the same with the old one, give up
+          LOG.trace("Will not add {} to cache because the old value {} "
+            + " is newer than us or has the same server name."
+            + " Maybe it is updated before we replace it", locs, oldLocs);
+          return oldLocs;
+        }
+        locs = mergedLocs;
+      } else {
+        // the region is different, here we trust the one we fetched. This 
maybe wrong but finally
+        // the upper layer can detect this and trigger removal of the wrong 
locations
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("The newly fetch region {} is different from the old one 
{} for row '{}',"
+            + " try replaying the old one...", region, oldRegion, 
Bytes.toStringBinary(startKey));
+        }
+      }
+
+      cache.put(startKey, locs);
+      cleanOverlappingRegions(locs);
+      return locs;
+    }
+
+    /**
+     * When caching a location, the region may have been the result of a 
merge. Check to see if the
+     * region's boundaries overlap any other cached locations. Those would 
have been merge parents
+     * which no longer exist. We need to proactively clear them out to avoid a 
case where a merged
+     * region which receives no requests never gets cleared. This causes 
requests to other merged
+     * regions after it to see the wrong cached location. See HBASE-27650
+     * @param locations the new location that was just cached
+     */
+    private void cleanOverlappingRegions(RegionLocations locations) {
+      RegionInfo region = locations.getRegionLocation().getRegion();
+
+      boolean isLast = isEmptyStopRow(region.getEndKey());
+
+      while (true) {
+        Map.Entry<byte[], RegionLocations> overlap =
+          isLast ? cache.lastEntry() : cache.lowerEntry(region.getEndKey());
+        if (
+          overlap == null || overlap.getValue() == locations
+            || Bytes.equals(overlap.getKey(), region.getStartKey())
+        ) {
+          break;
+        }
+
+        if (LOG.isInfoEnabled()) {
+          LOG.info(
+            "Removing cached location {} (endKey={}) because it overlaps with "
+              + "new location {} (endKey={})",
+            overlap.getValue(),
+            
Bytes.toStringBinary(overlap.getValue().getRegionLocation().getRegion().getEndKey()),
+            locations, 
Bytes.toStringBinary(locations.getRegionLocation().getRegion().getEndKey()));
+        }
+
+        cache.remove(overlap.getKey());
+      }
+    }
+
+    private boolean isEqual(RegionLocations locs1, RegionLocations locs2) {
+      HRegionLocation[] locArr1 = locs1.getRegionLocations();
+      HRegionLocation[] locArr2 = locs2.getRegionLocations();
+      if (locArr1.length != locArr2.length) {
+        return false;
+      }
+      for (int i = 0; i < locArr1.length; i++) {
+        // do not need to compare region info
+        HRegionLocation loc1 = locArr1[i];
+        HRegionLocation loc2 = locArr2[i];
+        if (loc1 == null) {
+          if (loc2 != null) {
+            return false;
+          }
+        } else {
+          if (loc2 == null) {
+            return false;
+          }
+          if (loc1.getSeqNum() != loc2.getSeqNum()) {
+            return false;
+          }
+          if (!Objects.equal(loc1.getServerName(), loc2.getServerName())) {
+            return false;
+          }
+        }
+      }
+      return true;
+    }
+
+    /** Returns all cached RegionLocations */
+    public Collection<RegionLocations> getAll() {
+      return cache.values();
+    }
+
+    /**
+     * Gets the RegionLocations for a given region's startKey. This is a 
direct lookup, if the key
+     * does not exist in the cache it will return null.
+     * @param startKey region start key to directly look up
+     */
+    public RegionLocations get(byte[] startKey) {
+      return cache.get(startKey);
+    }
+
+    /**
+     * Finds the RegionLocations for the region with the greatest startKey 
less than or equal to the
+     * given row
+     * @param row row to find locations
+     */
+    public RegionLocations findForRow(byte[] row) {
+      Map.Entry<byte[], RegionLocations> entry = cache.floorEntry(row);
+      if (entry == null) {
+        return null;
+      }
+      return entry.getValue();
+    }
+
+    /**
+     * Finds the RegionLocations for the region with the greatest startKey 
strictly less than the
+     * given row
+     * @param row row to find locations
+     */
+    public RegionLocations findForBeforeRow(byte[] row) {
+      boolean isEmptyStopRow = isEmptyStopRow(row);
+      Map.Entry<byte[], RegionLocations> entry =
+        isEmptyStopRow ? cache.lastEntry() : cache.lowerEntry(row);
+      if (entry == null) {
+        return null;
+      }
+      return entry.getValue();
+    }
+
+    /**
+     * Removes the location from the cache if it exists and can be removed. 
Once a change is deemed
+     * possible, calls beforeUpdate callback prior to making a change. Calls 
afterUpdate callback
+     * after making a change.
+     */
+    public synchronized void remove(HRegionLocation loc, Runnable beforeUpdate,

Review Comment:
   We need to be careful that the beforeUpdate and afterUpdate do not hold 
other locks otherwise it may introduce dead lock



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to