saintstack commented on a change in pull request #3575:
URL: https://github.com/apache/hbase/pull/3575#discussion_r686471173
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java
##########
@@ -42,13 +42,13 @@
*/
int pickRandomRegion(BalancerClusterState cluster, int server, double
chanceOfNoSwap) {
// Check to see if this is just a move.
- if (cluster.regionsPerServer[server].length == 0
+ if (cluster.regionsPerServer.get(server).size() == 0
Review comment:
s/isEmpty()/size() == 0/ ... its faster.
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaGroupingCostFunction.java
##########
@@ -44,11 +46,12 @@ final void prepare(BalancerClusterState cluster) {
protected final long getMaxCost(BalancerClusterState cluster) {
// max cost is the case where every region replica is hosted together
regardless of host
- int[] primariesOfRegions = new int[cluster.numRegions];
- System.arraycopy(cluster.regionIndexToPrimaryIndex, 0, primariesOfRegions,
0,
- cluster.regions.length);
-
- Arrays.sort(primariesOfRegions);
+ HashMap<Integer, ArrayList<Integer>> primariesOfRegions =
+ new HashMap<Integer, ArrayList<Integer>>(cluster.numRegions);
Review comment:
We don't do this double allocation on a single line... we give each
allocation and assign its own line... easier to read.
Left hand size should be Map rather than HashMap? Ditto for List and
ArrayLIst.
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaGroupingCostFunction.java
##########
@@ -80,22 +83,16 @@ protected double cost() {
* @param primariesOfRegions a sorted array of primary regions ids for the
regions hosted
* @return a sum of numReplicas-1 squared for each primary region in the
group.
*/
- protected final long costPerGroup(int[] primariesOfRegions) {
+ protected final long costPerGroup(HashMap<Integer, ArrayList<Integer>>
primariesOfRegions) {
Review comment:
s/Map/HashMap/ and s/List/ArrayList/ ?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
* @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
* @return a regionIndex for the selected primary or -1 if there is no
co-locating
*/
- int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[]
regionsPerGroup,
- int[] regionIndexToPrimaryIndex) {
- int currentPrimary = -1;
- int currentPrimaryIndex = -1;
- int selectedPrimaryIndex = -1;
+ int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>>
primariesOfRegionsPerGroup,
Review comment:
s/List/ArrayList/ and s/Map/HashMap/?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaHostCostFunction.java
##########
@@ -32,7 +35,7 @@
"hbase.master.balancer.stochastic.regionReplicaHostCostKey";
private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
- private int[][] primariesOfRegionsPerGroup;
+ private ArrayList<HashMap<Integer, ArrayList<Integer>>>
primariesOfRegionsPerGroup;
Review comment:
s/Map/HashMap/
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
##########
@@ -128,11 +128,11 @@ public void postMasterStartupInitialize() {
protected final boolean idleRegionServerExist(BalancerClusterState c){
boolean isServerExistsWithMoreRegions = false;
boolean isServerExistsWithZeroRegions = false;
- for (int[] serverList: c.regionsPerServer){
- if (serverList.length > 1) {
+ for (ArrayList<Integer> serverList: c.regionsPerServer){
Review comment:
s/List/ArrayList/ ?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaHostCostFunction.java
##########
@@ -32,7 +35,7 @@
"hbase.master.balancer.stochastic.regionReplicaHostCostKey";
private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
- private int[][] primariesOfRegionsPerGroup;
+ private ArrayList<HashMap<Integer, ArrayList<Integer>>>
primariesOfRegionsPerGroup;
Review comment:
What is the advantage of List of int [] ?
s/List/ArrayList/ ?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
* @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
* @return a regionIndex for the selected primary or -1 if there is no
co-locating
*/
- int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[]
regionsPerGroup,
- int[] regionIndexToPrimaryIndex) {
- int currentPrimary = -1;
- int currentPrimaryIndex = -1;
- int selectedPrimaryIndex = -1;
+ int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>>
primariesOfRegionsPerGroup,
+ Collection<Integer> regionsPerGroup, int[] regionIndexToPrimaryIndex) {
+
double currentLargestRandom = -1;
- // primariesOfRegionsPerGroup is a sorted array. Since it contains the
primary region
- // ids for the regions hosted in server, a consecutive repetition means
that replicas
- // are co-hosted
- for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
- int primary = j < primariesOfRegionsPerGroup.length ?
primariesOfRegionsPerGroup[j] : -1;
- if (primary != currentPrimary) { // check for whether we see a new
primary
- int numReplicas = j - currentPrimaryIndex;
- if (numReplicas > 1) { // means consecutive primaries, indicating
co-location
- // decide to select this primary region id or not
- double currentRandom = ThreadLocalRandom.current().nextDouble();
- // we don't know how many region replicas are co-hosted, we will
randomly select one
- // using reservoir sampling
(http://gregable.com/2007/10/reservoir-sampling.html)
- if (currentRandom > currentLargestRandom) {
- selectedPrimaryIndex = currentPrimary;
- currentLargestRandom = currentRandom;
- }
+ Map.Entry<Integer, ArrayList<Integer>> selectedPrimaryIndexEntry = null;
+
+ // primariesOfRegionsPerGroup is a hashmap of count of primary index on a
server. a count > 1
+ // means that replicas are co-hosted
+ for(Map.Entry<Integer, ArrayList<Integer>> pair :
primariesOfRegionsPerGroup.entrySet()) {
Review comment:
s/LIst/ArrayList/ unless you are using explicit ArrayList methods?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredStochasticBalancer.java
##########
@@ -526,9 +526,9 @@ protected BalanceAction generate(BalancerClusterState
cluster) {
thisRegion = pickLowestLocalRegionOnServer(cluster, thisServer);
}
if (thisRegion == -1) {
- if (cluster.regionsPerServer[thisServer].length > 0) {
+ if (cluster.regionsPerServer.get(thisServer).size() > 0) {
Review comment:
s/isEmpty()/size()/ and negate the check?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaGroupingCostFunction.java
##########
@@ -80,22 +83,16 @@ protected double cost() {
* @param primariesOfRegions a sorted array of primary regions ids for the
regions hosted
* @return a sum of numReplicas-1 squared for each primary region in the
group.
*/
- protected final long costPerGroup(int[] primariesOfRegions) {
+ protected final long costPerGroup(HashMap<Integer, ArrayList<Integer>>
primariesOfRegions) {
long cost = 0;
- int currentPrimary = -1;
- int currentPrimaryIndex = -1;
- // primariesOfRegions is a sorted array of primary ids of regions.
Replicas of regions
- // sharing the same primary will have consecutive numbers in the array.
- for (int j = 0; j <= primariesOfRegions.length; j++) {
- int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
- if (primary != currentPrimary) { // we see a new primary
- int numReplicas = j - currentPrimaryIndex;
- // square the cost
- if (numReplicas > 1) { // means consecutive primaries, indicating
co-location
- cost += (numReplicas - 1) * (numReplicas - 1);
- }
- currentPrimary = primary;
- currentPrimaryIndex = j;
+
+ // primariesOfRegionsPerGroup is a hashmap of count of primary index on a
server. a count > 1
+ // means that replicas are co-hosted
+ for(Map.Entry<Integer, ArrayList<Integer>> pair :
primariesOfRegions.entrySet()) {
Review comment:
s/List/ArrayList/
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
* @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
* @return a regionIndex for the selected primary or -1 if there is no
co-locating
*/
- int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[]
regionsPerGroup,
- int[] regionIndexToPrimaryIndex) {
- int currentPrimary = -1;
- int currentPrimaryIndex = -1;
- int selectedPrimaryIndex = -1;
+ int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>>
primariesOfRegionsPerGroup,
+ Collection<Integer> regionsPerGroup, int[] regionIndexToPrimaryIndex) {
+
double currentLargestRandom = -1;
- // primariesOfRegionsPerGroup is a sorted array. Since it contains the
primary region
- // ids for the regions hosted in server, a consecutive repetition means
that replicas
- // are co-hosted
- for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
- int primary = j < primariesOfRegionsPerGroup.length ?
primariesOfRegionsPerGroup[j] : -1;
- if (primary != currentPrimary) { // check for whether we see a new
primary
- int numReplicas = j - currentPrimaryIndex;
- if (numReplicas > 1) { // means consecutive primaries, indicating
co-location
- // decide to select this primary region id or not
- double currentRandom = ThreadLocalRandom.current().nextDouble();
- // we don't know how many region replicas are co-hosted, we will
randomly select one
- // using reservoir sampling
(http://gregable.com/2007/10/reservoir-sampling.html)
- if (currentRandom > currentLargestRandom) {
- selectedPrimaryIndex = currentPrimary;
- currentLargestRandom = currentRandom;
- }
+ Map.Entry<Integer, ArrayList<Integer>> selectedPrimaryIndexEntry = null;
Review comment:
s/List/ArrayList/ ?
##########
File path:
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
* @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
* @return a regionIndex for the selected primary or -1 if there is no
co-locating
*/
- int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[]
regionsPerGroup,
- int[] regionIndexToPrimaryIndex) {
- int currentPrimary = -1;
- int currentPrimaryIndex = -1;
- int selectedPrimaryIndex = -1;
+ int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>>
primariesOfRegionsPerGroup,
+ Collection<Integer> regionsPerGroup, int[] regionIndexToPrimaryIndex) {
+
double currentLargestRandom = -1;
- // primariesOfRegionsPerGroup is a sorted array. Since it contains the
primary region
- // ids for the regions hosted in server, a consecutive repetition means
that replicas
- // are co-hosted
- for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
- int primary = j < primariesOfRegionsPerGroup.length ?
primariesOfRegionsPerGroup[j] : -1;
- if (primary != currentPrimary) { // check for whether we see a new
primary
- int numReplicas = j - currentPrimaryIndex;
- if (numReplicas > 1) { // means consecutive primaries, indicating
co-location
- // decide to select this primary region id or not
- double currentRandom = ThreadLocalRandom.current().nextDouble();
- // we don't know how many region replicas are co-hosted, we will
randomly select one
- // using reservoir sampling
(http://gregable.com/2007/10/reservoir-sampling.html)
- if (currentRandom > currentLargestRandom) {
- selectedPrimaryIndex = currentPrimary;
- currentLargestRandom = currentRandom;
- }
+ Map.Entry<Integer, ArrayList<Integer>> selectedPrimaryIndexEntry = null;
+
+ // primariesOfRegionsPerGroup is a hashmap of count of primary index on a
server. a count > 1
Review comment:
For my info, whats a 'count of primary index on a server'? Thanks.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]