Apache9 commented on a change in pull request #2570: URL: https://github.com/apache/hbase/pull/2570#discussion_r508960285
########## File path: hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaReplicaLoadBalanceReplicaSimpleChooser.java ########## @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.ScheduledChore; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ThreadLocalRandom; +import static org.apache.hadoop.hbase.HConstants.DEFAULT_META_REPLICA_NUM; +import static org.apache.hadoop.hbase.HConstants.META_REPLICAS_NUM; +import static org.apache.hadoop.hbase.client.ConnectionUtils.isEmptyStopRow; +import static org.apache.hadoop.hbase.util.Bytes.BYTES_COMPARATOR; +import static org.apache.hadoop.hbase.util.ConcurrentMapUtils.computeIfAbsent; + +/** + * MetaReplicaLoadBalanceReplicaSimpleChooser implements a simple meta replica load balancing + * algorithm. It maintains a stale location cache for each table. Whenever client looks up meta, + * it first check if the row is the stale location cache, if yes, this means the the location from + * meta replica is stale, it will go to the primary meta to look up update-to-date location; + * otherwise, it will randomly pick up a meta replica region for meta lookup. When clients receive + * RegionNotServedException from region servers, it will add these region locations to the stale + * location cache. The stale cache will be cleaned up periodically by a chore. + */ + +/** + * StaleLocationCacheEntry is the entry when a stale location is reported by an client. + */ +class StaleLocationCacheEntry { Review comment: Give it a separated file or make it an inner class. Having two classes in a single file is not a good practise. ########## File path: hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaReplicaLoadBalanceReplicaChooser.java ########## @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.TableName; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * There are two modes with meta replica support. + * HighAvailable - Client sends requests to the primary meta region first, within a + * configured amount of time, if there is no response coming back, + * client sends requests to all replica regions and takes the first + * response. + * + * LoadBalance - Client sends requests to meta replica regions in a round-robin mode, + * if results from replica regions are stale, next time, client sends requests for + * these stable locations to the primary meta region. In this mode, scan + * requests are load balanced across all replica regions. + */ +enum MetaReplicaMode { + None, + HighAvailable, Review comment: But better to put this enum to a separated file? ########## File path: hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaReplicaLoadBalanceReplicaSimpleChooser.java ########## @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.ScheduledChore; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ThreadLocalRandom; +import static org.apache.hadoop.hbase.HConstants.DEFAULT_META_REPLICA_NUM; +import static org.apache.hadoop.hbase.HConstants.META_REPLICAS_NUM; +import static org.apache.hadoop.hbase.client.ConnectionUtils.isEmptyStopRow; +import static org.apache.hadoop.hbase.util.Bytes.BYTES_COMPARATOR; +import static org.apache.hadoop.hbase.util.ConcurrentMapUtils.computeIfAbsent; + +/** + * MetaReplicaLoadBalanceReplicaSimpleChooser implements a simple meta replica load balancing + * algorithm. It maintains a stale location cache for each table. Whenever client looks up meta, + * it first check if the row is the stale location cache, if yes, this means the the location from + * meta replica is stale, it will go to the primary meta to look up update-to-date location; + * otherwise, it will randomly pick up a meta replica region for meta lookup. When clients receive + * RegionNotServedException from region servers, it will add these region locations to the stale + * location cache. The stale cache will be cleaned up periodically by a chore. + */ + +/** + * StaleLocationCacheEntry is the entry when a stale location is reported by an client. + */ +class StaleLocationCacheEntry { + // meta replica id where + private int metaReplicaId; + + // timestamp in milliseconds + private long timestamp; + + private byte[] endKey; + + StaleLocationCacheEntry(final int metaReplicaId, final byte[] endKey) { + this.metaReplicaId = metaReplicaId; + this.endKey = endKey; + timestamp = System.currentTimeMillis(); + } + + public byte[] getEndKey() { + return this.endKey; + } + + public int getMetaReplicaId() { + return this.metaReplicaId; + } + public long getTimestamp() { + return this.timestamp; + } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("endKey", endKey) + .append("metaReplicaId", metaReplicaId) + .append("timestamp", timestamp) + .toString(); + } +} + +/** + * A simple implementation of MetaReplicaLoadBalanceReplicaChooser. + * + * It follows a simple algorithm to choose a meta replica to go: + * + * 1. If there is no stale location entry for rows it looks up, it will randomly + * pick a meta replica region to do lookup. + * 2. If the location from meta replica region is stale, client gets RegionNotServedException + * from region server, in this case, it will create StaleLocationCacheEntry in + * MetaReplicaLoadBalanceReplicaSimpleChooser. + * 3. When client tries to do meta lookup, it checks StaleLocationCache first for rows it tries to + * lookup, if entry exists, it will go with primary meta region to do lookup; otherwise, it + * will follow step 1. + * 4. A chore will periodically run to clean up cache entries in the StaleLocationCache. + */ +class MetaReplicaLoadBalanceReplicaSimpleChooser implements MetaReplicaLoadBalanceReplicaChooser { Review comment: Maybe first we just need to cache the replica id which contains the stale data? For example, we get location from replica 1 and it is stale, but it does not mean that the location from replica 2 will be stale? If we use the algorithm here, we will go to replica 0(the primary replica) instead of replica 1. And consider the whole client locator architecture, where have a meta cache in the upper layer, I would suggest that, we only provide two selection ways here: the first is random(no primay), and if you got a stale data, then use the second way, only go to primary. Maybe we could add a parameter on the choose method to indicate whether we only want to go to the primary replica. ########## File path: hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaReplicaLoadBalanceReplicaChooser.java ########## @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.TableName; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * There are two modes with meta replica support. + * HighAvailable - Client sends requests to the primary meta region first, within a + * configured amount of time, if there is no response coming back, + * client sends requests to all replica regions and takes the first + * response. + * + * LoadBalance - Client sends requests to meta replica regions in a round-robin mode, + * if results from replica regions are stale, next time, client sends requests for + * these stable locations to the primary meta region. In this mode, scan + * requests are load balanced across all replica regions. + */ +enum MetaReplicaMode { + None, + HighAvailable, Review comment: I do not think this is suitable for user tables yet. At least for the LoadBalance mode, where we explicitly mention the 'stale' data will impact our policy. For meta table I think whether the data is 'stale' can be detected by our own, but for user tables, only users know how to determine whether the data is stale, which means we need to make the interface be able to get information from users on whether the previous data is stale and I only want to go to primary replica this time. This requires a good design on interface as it will be IA.Public, so I think keep it as meta only and IA.Private is more suitable for now. ########## File path: hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java ########## @@ -130,8 +133,12 @@ public AsyncConnectionImpl(Configuration conf, ConnectionRegistry registry, Stri SocketAddress localAddress, User user) { this.conf = conf; this.user = user; - if (user.isLoginFromKeytab()) { - spawnRenewalChore(user.getUGI()); + + if (user.isLoginFromKeytab() || isMetaReplicaLBMode()) { Review comment: Maybe we could just create the chore service on-demand? I do not think it is a good idea to add new checks in the future if more features need to use ChoreService... ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org