[GitHub] [hbase] saintstack commented on a change in pull request #2584: HBASE-25126 Add load balance logic in hbase-client to distribute read…

2020-11-03 Thread GitBox


saintstack commented on a change in pull request #2584:
URL: https://github.com/apache/hbase/pull/2584#discussion_r517036330



##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java
##
@@ -433,9 +474,24 @@ private void locateInMeta(TableName tableName, 
LocateRequest req) {
 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey, 
true)
   
.addFamily(HConstants.CATALOG_FAMILY).setReversed(true).setCaching(locatePrefetchLimit)
   .setReadType(ReadType.PREAD);
-if (useMetaReplicas) {
-  scan.setConsistency(Consistency.TIMELINE);
+
+switch (this.metaReplicaMode) {
+  case LoadBalance:
+int metaReplicaId = this.metaReplicaSelector.select(tableName, 
req.row, req.locateType);
+if (metaReplicaId != RegionInfo.DEFAULT_REPLICA_ID) {
+  // If the selector gives a non-primary meta replica region, then go 
with it.
+  // Otherwise, just go to primary in non-hedgedRead mode.
+  scan.setConsistency(Consistency.TIMELINE);
+  scan.setReplicaId(metaReplicaId);
+}
+break;
+  case HedgedRead:
+scan.setConsistency(Consistency.TIMELINE);
+break;
+  default:
+// do nothing

Review comment:
   Did you file one @huaxiangsun ?

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaMode.java
##
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * There are two modes with catalog replica support. 
+ *
+ * 
+ *   HEDGED_READ  - Client sends requests to the primary region first, 
within a
+ * configured amount of time, if there is no response coming 
back,
+ * client sends requests to all replica regions and takes the 
first
+ * response. 
+ *
+ *   LOAD_BALANCE - Client sends requests to replica regions in a 
round-robin mode,
+ * if results from replica regions are stale, next time, 
client sends requests for
+ * these stale locations to the primary region. In this mode, 
scan
+ * requests are load balanced across all replica regions.
+ * 
+ */
+@InterfaceAudience.Private
+enum CatalogReplicaMode {
+  NONE {
+@Override
+public String toString() {
+  return "None";
+}
+  },
+  HEDGED_READ {
+@Override
+public String toString() {
+  return "HedgedRead";
+}
+  },
+  LOAD_BALANCE {
+@Override
+public String toString() {
+  return "LoadBalance";
+}
+  };
+
+  public static CatalogReplicaMode fromString(final String value) {
+for(CatalogReplicaMode mode : values()) {
+  if (mode.toString().equalsIgnoreCase(value)) {

Review comment:
   Good that you ignore case.

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java
##
@@ -0,0 +1,302 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.apache.hadoop.hbase.client.ConnectionUtils.isEmptyStopRow;
+import static org.apache.hadoop.hbase.util.Bytes.BYTES_COMPARATOR;
+import static org.apache.hadoop.hbase.util.ConcurrentMapUtils.computeIfAbsent;
+import java.util.Iterator;
+import java.util.Map;
+import 

[GitHub] [hbase] saintstack commented on a change in pull request #2584: HBASE-25126 Add load balance logic in hbase-client to distribute read…

2020-10-29 Thread GitBox


saintstack commented on a change in pull request #2584:
URL: https://github.com/apache/hbase/pull/2584#discussion_r514632131



##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java
##
@@ -196,8 +202,43 @@ private boolean tryComplete(LocateRequest req, 
CompletableFuture {
+int numOfReplicas = 1;
+try {
+  RegionLocations metaLocations = 
conn.registry.getMetaRegionLocations().get(
+GET_META_LOCATIONS_TIMEOUT, TimeUnit.MILLISECONDS);
+  numOfReplicas = metaLocations.size();
+} catch (Exception e) {
+  LOG.error("Failed to get table {}'s region replication, ", 
META_TABLE_NAME, e);
+}
+return numOfReplicas;
+  });
+break;
+  case None:
+// If user does not configure LOCATOR_META_REPLICAS_MODE, let's check 
the legacy config.
+if (this.metaReplicaMode == CatalogReplicaMode.None) {
+  boolean useMetaReplicas = 
conn.getConfiguration().getBoolean(USE_META_REPLICAS,
+DEFAULT_USE_META_REPLICAS);
+  if (useMetaReplicas) {
+this.metaReplicaMode = CatalogReplicaMode.HedgedRead;
+  }
+}
+break;
+  default:
+// Doing nothing

Review comment:
   I thought getting here would be an exception... sounds like it is not.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [hbase] saintstack commented on a change in pull request #2584: HBASE-25126 Add load balance logic in hbase-client to distribute read…

2020-10-29 Thread GitBox


saintstack commented on a change in pull request #2584:
URL: https://github.com/apache/hbase/pull/2584#discussion_r514543049



##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java
##
@@ -182,8 +183,20 @@ public void newDead(ServerName sn) {
   }
 
   private void spawnRenewalChore(final UserGroupInformation user) {
-authService = new ChoreService("Relogin service");
-authService.scheduleChore(AuthUtil.getAuthRenewalChore(user));
+ChoreService service = getChoreService();
+service.scheduleChore(AuthUtil.getAuthRenewalChore(user));
+  }
+
+  /**
+   * If choreService has not been created yet, create the ChoreService.
+   * It is not thread safe.
+   * @return ChoreService
+   */
+  ChoreService getChoreService() {

Review comment:
   Do we need this?
   
   It is called from the constructor only. If so, just create choreservice in 
the constructor.
   
   If getChoreService can be called from elsewhere, then does this need to be 
synchronized so we don't make multiple chore service instances?

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java
##
@@ -196,8 +202,43 @@ private boolean tryComplete(LocateRequest req, 
CompletableFuture {
+int numOfReplicas = 1;
+try {
+  RegionLocations metaLocations = 
conn.registry.getMetaRegionLocations().get(
+GET_META_LOCATIONS_TIMEOUT, TimeUnit.MILLISECONDS);
+  numOfReplicas = metaLocations.size();
+} catch (Exception e) {
+  LOG.error("Failed to get table {}'s region replication, ", 
META_TABLE_NAME, e);
+}
+return numOfReplicas;
+  });
+break;
+  case None:
+// If user does not configure LOCATOR_META_REPLICAS_MODE, let's check 
the legacy config.
+if (this.metaReplicaMode == CatalogReplicaMode.None) {
+  boolean useMetaReplicas = 
conn.getConfiguration().getBoolean(USE_META_REPLICAS,
+DEFAULT_USE_META_REPLICAS);
+  if (useMetaReplicas) {
+this.metaReplicaMode = CatalogReplicaMode.HedgedRead;

Review comment:
   None is not the same as hedged read, right?
   
   If user configures hedged read as metaReplicaMode, they fall through to 
default which 'does nothing'. That don't seem right. Shouldn't it have a case 
here?

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionLocator.java
##
@@ -37,6 +37,12 @@
  */
 @InterfaceAudience.Public
 public interface RegionLocator extends Closeable {
+
+  String LOCATOR_META_REPLICAS_MODE = "hbase.locator.meta.replicas.mode";

Review comment:
   Needs javadoc to at least point at what these are about.

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java
##
@@ -433,9 +474,24 @@ private void locateInMeta(TableName tableName, 
LocateRequest req) {
 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey, 
true)
   
.addFamily(HConstants.CATALOG_FAMILY).setReversed(true).setCaching(locatePrefetchLimit)
   .setReadType(ReadType.PREAD);
-if (useMetaReplicas) {
-  scan.setConsistency(Consistency.TIMELINE);
+
+switch (this.metaReplicaMode) {
+  case LoadBalance:
+int metaReplicaId = this.metaReplicaSelector.select(tableName, 
req.row, req.locateType);
+if (metaReplicaId != RegionInfo.DEFAULT_REPLICA_ID) {
+  // If the selector gives a non-primary meta replica region, then go 
with it.
+  // Otherwise, just go to primary in non-hedgedRead mode.
+  scan.setConsistency(Consistency.TIMELINE);
+  scan.setReplicaId(metaReplicaId);
+}
+break;
+  case HedgedRead:
+scan.setConsistency(Consistency.TIMELINE);
+break;
+  default:
+// do nothing

Review comment:
   Yeah, said this before but in follow-on, would be good to shove all this 
stuff into a CatalogReplicaMode class. Internally this class would figure which 
policy to run. It would have a method that took a Scan that allowed decorating 
the Scan w/ whatever the mode needed to implement its policy. Later.

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java
##
@@ -577,6 +635,15 @@ private void removeLocationFromCache(HRegionLocation loc) {
   if (!canUpdateOnError(loc, oldLoc)) {
 return;
   }
+  // Tell metaReplicaSelector that the location is stale. It will create a 
stale entry
+  // with timestamp internally. Next time the client looks up the same 
location,
+  // it will pick a different meta replica region. For the current 
implementation,
+  // the metaReplicaId is not used, so the primary one is passed in.
+  if (this.metaReplicaMode == CatalogReplicaMode.LoadBalance) {
+// TODO: pass 

[GitHub] [hbase] saintstack commented on a change in pull request #2584: HBASE-25126 Add load balance logic in hbase-client to distribute read…

2020-10-27 Thread GitBox


saintstack commented on a change in pull request #2584:
URL: https://github.com/apache/hbase/pull/2584#discussion_r512903424



##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java
##
@@ -89,7 +90,10 @@
 
   private final int locatePrefetchLimit;
 
-  private final boolean useMetaReplicas;
+  // When useMetaReplicas is true, the mode tells if HedgedRead, LoadBalance 
mode is supported.

Review comment:
   Comment is right? There is no useMetaReplicas here anymore.

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java
##
@@ -196,8 +200,33 @@ private boolean tryComplete(LocateRequest req, 
CompletableFuturehttp://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.util.ReflectionUtils;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Factory to create a {@link CatalogReplicaLoadBalanceReplicaSelector}
+ */
+@InterfaceAudience.Private
+public final class CatalogReplicaLoadBalanceReplicaSelectorFactory {

Review comment:
   Yeah, don't need second 'Replica' in the name.

##
File path: 
hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceReplicaSimpleSelector.java
##
@@ -0,0 +1,273 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.apache.hadoop.hbase.client.ConnectionUtils.isEmptyStopRow;
+import static org.apache.hadoop.hbase.util.Bytes.BYTES_COMPARATOR;
+import static org.apache.hadoop.hbase.util.ConcurrentMapUtils.computeIfAbsent;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ConcurrentNavigableMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * CatalogReplicaLoadBalanceReplicaSimpleSelector implements a simple catalog 
replica load balancing
+ * algorithm. It maintains a stale location cache for each table. Whenever 
client looks up location,
+ * it first check if the row is the stale location cache. If yes, the location 
from
+ * catalog replica is stale, it will go to the primary region to look up 
update-to-date location;
+ * otherwise, it will randomly pick up a replica region for lookup. When 
clients receive
+ * RegionNotServedException from region servers, it will add these region 
locations to the stale
+ * location cache. The stale cache will be cleaned up periodically by a chore.
+ *
+ * It follows a simple algorithm to choose a replica to go:
+ *
+ *  1. If there is no stale location entry for rows it looks up, it will 
randomly
+ * pick a replica region to do lookup.
+ *  2. If the location from the replica region is stale, client gets 
RegionNotServedException
+ * from region server, in this case, it will create 
StaleLocationCacheEntry in
+ * CatalogReplicaLoadBalanceReplicaSimpleSelector.
+ *  3. When client tries to do location lookup, it checks StaleLocationCache 
first for rows it
+ * tries to lookup, if entry exists, it will go with primary meta region 
to do lookup;
+ * otherwise, it will follow step 1.
+ *  4. A chore will periodically run to clean up cache entries in the 
StaleLocationCache.
+ */
+class