saintstack commented on a change in pull request #3575:
URL: https://github.com/apache/hbase/pull/3575#discussion_r686471173



##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java
##########
@@ -42,13 +42,13 @@
    */
   int pickRandomRegion(BalancerClusterState cluster, int server, double 
chanceOfNoSwap) {
     // Check to see if this is just a move.
-    if (cluster.regionsPerServer[server].length == 0
+    if (cluster.regionsPerServer.get(server).size() == 0

Review comment:
       s/isEmpty()/size() == 0/ ... its faster.

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaGroupingCostFunction.java
##########
@@ -44,11 +46,12 @@ final void prepare(BalancerClusterState cluster) {
 
   protected final long getMaxCost(BalancerClusterState cluster) {
     // max cost is the case where every region replica is hosted together 
regardless of host
-    int[] primariesOfRegions = new int[cluster.numRegions];
-    System.arraycopy(cluster.regionIndexToPrimaryIndex, 0, primariesOfRegions, 
0,
-      cluster.regions.length);
-
-    Arrays.sort(primariesOfRegions);
+    HashMap<Integer, ArrayList<Integer>> primariesOfRegions =
+      new HashMap<Integer, ArrayList<Integer>>(cluster.numRegions);

Review comment:
       We don't do this double allocation on a single line... we give each 
allocation and assign its own line... easier to read.
   
   Left hand size should be Map rather than HashMap? Ditto for List and 
ArrayLIst.

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaGroupingCostFunction.java
##########
@@ -80,22 +83,16 @@ protected double cost() {
    * @param primariesOfRegions a sorted array of primary regions ids for the 
regions hosted
    * @return a sum of numReplicas-1 squared for each primary region in the 
group.
    */
-  protected final long costPerGroup(int[] primariesOfRegions) {
+  protected final long costPerGroup(HashMap<Integer, ArrayList<Integer>> 
primariesOfRegions) {

Review comment:
       s/Map/HashMap/ and s/List/ArrayList/ ?

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
    * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
    * @return a regionIndex for the selected primary or -1 if there is no 
co-locating
    */
-  int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] 
regionsPerGroup,
-    int[] regionIndexToPrimaryIndex) {
-    int currentPrimary = -1;
-    int currentPrimaryIndex = -1;
-    int selectedPrimaryIndex = -1;
+  int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>> 
primariesOfRegionsPerGroup,

Review comment:
       s/List/ArrayList/ and s/Map/HashMap/?

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaHostCostFunction.java
##########
@@ -32,7 +35,7 @@
     "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
   private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
 
-  private int[][] primariesOfRegionsPerGroup;
+  private ArrayList<HashMap<Integer, ArrayList<Integer>>> 
primariesOfRegionsPerGroup;

Review comment:
       s/Map/HashMap/

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
##########
@@ -128,11 +128,11 @@ public void postMasterStartupInitialize() {
   protected final boolean idleRegionServerExist(BalancerClusterState c){
     boolean isServerExistsWithMoreRegions = false;
     boolean isServerExistsWithZeroRegions = false;
-    for (int[] serverList: c.regionsPerServer){
-      if (serverList.length > 1) {
+    for (ArrayList<Integer> serverList: c.regionsPerServer){

Review comment:
       s/List/ArrayList/ ?

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaHostCostFunction.java
##########
@@ -32,7 +35,7 @@
     "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
   private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
 
-  private int[][] primariesOfRegionsPerGroup;
+  private ArrayList<HashMap<Integer, ArrayList<Integer>>> 
primariesOfRegionsPerGroup;

Review comment:
       What is the advantage of List of int [] ?
   
   s/List/ArrayList/ ?

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
    * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
    * @return a regionIndex for the selected primary or -1 if there is no 
co-locating
    */
-  int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] 
regionsPerGroup,
-    int[] regionIndexToPrimaryIndex) {
-    int currentPrimary = -1;
-    int currentPrimaryIndex = -1;
-    int selectedPrimaryIndex = -1;
+  int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>> 
primariesOfRegionsPerGroup,
+    Collection<Integer> regionsPerGroup, int[] regionIndexToPrimaryIndex) {
+
     double currentLargestRandom = -1;
-    // primariesOfRegionsPerGroup is a sorted array. Since it contains the 
primary region
-    // ids for the regions hosted in server, a consecutive repetition means 
that replicas
-    // are co-hosted
-    for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
-      int primary = j < primariesOfRegionsPerGroup.length ? 
primariesOfRegionsPerGroup[j] : -1;
-      if (primary != currentPrimary) { // check for whether we see a new 
primary
-        int numReplicas = j - currentPrimaryIndex;
-        if (numReplicas > 1) { // means consecutive primaries, indicating 
co-location
-          // decide to select this primary region id or not
-          double currentRandom = ThreadLocalRandom.current().nextDouble();
-          // we don't know how many region replicas are co-hosted, we will 
randomly select one
-          // using reservoir sampling 
(http://gregable.com/2007/10/reservoir-sampling.html)
-          if (currentRandom > currentLargestRandom) {
-            selectedPrimaryIndex = currentPrimary;
-            currentLargestRandom = currentRandom;
-          }
+    Map.Entry<Integer, ArrayList<Integer>> selectedPrimaryIndexEntry = null;
+
+    // primariesOfRegionsPerGroup is a hashmap of count of primary index on a 
server. a count > 1
+    // means that replicas are co-hosted
+    for(Map.Entry<Integer, ArrayList<Integer>> pair : 
primariesOfRegionsPerGroup.entrySet()) {

Review comment:
       s/LIst/ArrayList/ unless you are using explicit ArrayList methods?

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredStochasticBalancer.java
##########
@@ -526,9 +526,9 @@ protected BalanceAction generate(BalancerClusterState 
cluster) {
         thisRegion = pickLowestLocalRegionOnServer(cluster, thisServer);
       }
       if (thisRegion == -1) {
-        if (cluster.regionsPerServer[thisServer].length > 0) {
+        if (cluster.regionsPerServer.get(thisServer).size() > 0) {

Review comment:
       s/isEmpty()/size()/ and negate the check?

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaGroupingCostFunction.java
##########
@@ -80,22 +83,16 @@ protected double cost() {
    * @param primariesOfRegions a sorted array of primary regions ids for the 
regions hosted
    * @return a sum of numReplicas-1 squared for each primary region in the 
group.
    */
-  protected final long costPerGroup(int[] primariesOfRegions) {
+  protected final long costPerGroup(HashMap<Integer, ArrayList<Integer>> 
primariesOfRegions) {
     long cost = 0;
-    int currentPrimary = -1;
-    int currentPrimaryIndex = -1;
-    // primariesOfRegions is a sorted array of primary ids of regions. 
Replicas of regions
-    // sharing the same primary will have consecutive numbers in the array.
-    for (int j = 0; j <= primariesOfRegions.length; j++) {
-      int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
-      if (primary != currentPrimary) { // we see a new primary
-        int numReplicas = j - currentPrimaryIndex;
-        // square the cost
-        if (numReplicas > 1) { // means consecutive primaries, indicating 
co-location
-          cost += (numReplicas - 1) * (numReplicas - 1);
-        }
-        currentPrimary = primary;
-        currentPrimaryIndex = j;
+
+    // primariesOfRegionsPerGroup is a hashmap of count of primary index on a 
server. a count > 1
+    // means that replicas are co-hosted
+    for(Map.Entry<Integer, ArrayList<Integer>> pair : 
primariesOfRegions.entrySet()) {

Review comment:
       s/List/ArrayList/

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
    * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
    * @return a regionIndex for the selected primary or -1 if there is no 
co-locating
    */
-  int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] 
regionsPerGroup,
-    int[] regionIndexToPrimaryIndex) {
-    int currentPrimary = -1;
-    int currentPrimaryIndex = -1;
-    int selectedPrimaryIndex = -1;
+  int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>> 
primariesOfRegionsPerGroup,
+    Collection<Integer> regionsPerGroup, int[] regionIndexToPrimaryIndex) {
+
     double currentLargestRandom = -1;
-    // primariesOfRegionsPerGroup is a sorted array. Since it contains the 
primary region
-    // ids for the regions hosted in server, a consecutive repetition means 
that replicas
-    // are co-hosted
-    for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
-      int primary = j < primariesOfRegionsPerGroup.length ? 
primariesOfRegionsPerGroup[j] : -1;
-      if (primary != currentPrimary) { // check for whether we see a new 
primary
-        int numReplicas = j - currentPrimaryIndex;
-        if (numReplicas > 1) { // means consecutive primaries, indicating 
co-location
-          // decide to select this primary region id or not
-          double currentRandom = ThreadLocalRandom.current().nextDouble();
-          // we don't know how many region replicas are co-hosted, we will 
randomly select one
-          // using reservoir sampling 
(http://gregable.com/2007/10/reservoir-sampling.html)
-          if (currentRandom > currentLargestRandom) {
-            selectedPrimaryIndex = currentPrimary;
-            currentLargestRandom = currentRandom;
-          }
+    Map.Entry<Integer, ArrayList<Integer>> selectedPrimaryIndexEntry = null;

Review comment:
       s/List/ArrayList/ ?

##########
File path: 
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java
##########
@@ -39,42 +44,37 @@
    * @param regionIndexToPrimaryIndex Cluster.regionsIndexToPrimaryIndex
    * @return a regionIndex for the selected primary or -1 if there is no 
co-locating
    */
-  int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] 
regionsPerGroup,
-    int[] regionIndexToPrimaryIndex) {
-    int currentPrimary = -1;
-    int currentPrimaryIndex = -1;
-    int selectedPrimaryIndex = -1;
+  int selectCoHostedRegionPerGroup(HashMap<Integer, ArrayList<Integer>> 
primariesOfRegionsPerGroup,
+    Collection<Integer> regionsPerGroup, int[] regionIndexToPrimaryIndex) {
+
     double currentLargestRandom = -1;
-    // primariesOfRegionsPerGroup is a sorted array. Since it contains the 
primary region
-    // ids for the regions hosted in server, a consecutive repetition means 
that replicas
-    // are co-hosted
-    for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
-      int primary = j < primariesOfRegionsPerGroup.length ? 
primariesOfRegionsPerGroup[j] : -1;
-      if (primary != currentPrimary) { // check for whether we see a new 
primary
-        int numReplicas = j - currentPrimaryIndex;
-        if (numReplicas > 1) { // means consecutive primaries, indicating 
co-location
-          // decide to select this primary region id or not
-          double currentRandom = ThreadLocalRandom.current().nextDouble();
-          // we don't know how many region replicas are co-hosted, we will 
randomly select one
-          // using reservoir sampling 
(http://gregable.com/2007/10/reservoir-sampling.html)
-          if (currentRandom > currentLargestRandom) {
-            selectedPrimaryIndex = currentPrimary;
-            currentLargestRandom = currentRandom;
-          }
+    Map.Entry<Integer, ArrayList<Integer>> selectedPrimaryIndexEntry = null;
+
+    // primariesOfRegionsPerGroup is a hashmap of count of primary index on a 
server. a count > 1

Review comment:
       For my info, whats a 'count of primary index on a server'? Thanks.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to