Repository: hbase
Updated Branches:
  refs/heads/master 6905d272d -> 7d3a89ce8


HBASE-15396 Enhance mapreduce.TableSplit to add encoded region name

Signed-off-by: Sean Busbey <bus...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7d3a89ce
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7d3a89ce
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7d3a89ce

Branch: refs/heads/master
Commit: 7d3a89ce8e07d9fd1c31b4bd2324b71dd10ecef5
Parents: 6905d27
Author: Harsh J <ha...@cloudera.com>
Authored: Fri Mar 4 15:59:48 2016 +0530
Committer: Sean Busbey <bus...@apache.org>
Committed: Thu Mar 31 22:59:43 2016 -0500

----------------------------------------------------------------------
 .../mapreduce/MultiTableInputFormatBase.java    |  4 +-
 .../hbase/mapreduce/TableInputFormatBase.java   | 10 ++--
 .../hadoop/hbase/mapreduce/TableSplit.java      | 59 ++++++++++++++++----
 .../hadoop/hbase/mapreduce/TestTableSplit.java  | 22 +++++++-
 4 files changed, 77 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/7d3a89ce/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
index 6f0075a..4931c3f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
@@ -223,11 +223,13 @@ public abstract class MultiTableInputFormatBase extends
                       keys.getFirst()[i], false);
               String regionHostname = hregionLocation.getHostname();
               HRegionInfo regionInfo = hregionLocation.getRegionInfo();
+              String encodedRegionName = regionInfo.getEncodedName();
               long regionSize = sizeCalculator.getRegionSize(
                       regionInfo.getRegionName());
 
               TableSplit split = new TableSplit(table.getName(),
-                      scan, splitStart, splitStop, regionHostname, regionSize);
+                      scan, splitStart, splitStop, regionHostname,
+                      encodedRegionName, regionSize);
 
               splits.add(split);
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/7d3a89ce/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
index 65b4efc..2cde4b9 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
@@ -300,9 +300,10 @@ extends InputFormat<ImmutableBytesWritable, Result> {
               keys.getSecond()[i] : stopRow;
   
           byte[] regionName = location.getRegionInfo().getRegionName();
+          String encodedRegionName = location.getRegionInfo().getEncodedName();
           long regionSize = sizeCalculator.getRegionSize(regionName);
           TableSplit split = new TableSplit(tableName, scan,
-            splitStart, splitStop, regionLocation, regionSize);
+            splitStart, splitStop, regionLocation, encodedRegionName, 
regionSize);
           splits.add(split);
           if (LOG.isDebugEnabled()) {
             LOG.debug("getSplits: split -> " + i + " -> " + split);
@@ -382,6 +383,7 @@ extends InputFormat<ImmutableBytesWritable, Result> {
       TableSplit ts = (TableSplit)list.get(count);
       TableName tableName = ts.getTable();
       String regionLocation = ts.getRegionLocation();
+      String encodedRegionName = ts.getEncodedRegionName();
       long regionSize = ts.getLength();
       if (regionSize >= dataSkewThreshold) {
         // if the current region size is large than the data skew threshold,
@@ -390,9 +392,9 @@ extends InputFormat<ImmutableBytesWritable, Result> {
          //Set the size of child TableSplit as 1/2 of the region size. The 
exact size of the
          // MapReduce input splits is not far off.
         TableSplit t1 = new TableSplit(tableName, scan, ts.getStartRow(), 
splitKey, regionLocation,
-                regionSize / 2);
+                encodedRegionName, regionSize / 2);
         TableSplit t2 = new TableSplit(tableName, scan, splitKey, 
ts.getEndRow(), regionLocation,
-                regionSize - regionSize / 2);
+                encodedRegionName, regionSize - regionSize / 2);
         resultList.add(t1);
         resultList.add(t2);
         count++;
@@ -419,7 +421,7 @@ extends InputFormat<ImmutableBytesWritable, Result> {
           }
         }
         TableSplit t = new TableSplit(tableName, scan, splitStartKey, 
splitEndKey,
-                regionLocation, totalSize);
+                regionLocation, encodedRegionName, totalSize);
         resultList.add(t);
       }
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/7d3a89ce/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
index 850db81..1795909 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java
@@ -52,7 +52,9 @@ implements Writable, Comparable<TableSplit> {
   enum Version {
     UNVERSIONED(0),
     // Initial number we put on TableSplit when we introduced versioning.
-    INITIAL(-1);
+    INITIAL(-1),
+    // Added an encoded region name field for easier identification of split 
-> region
+    WITH_ENCODED_REGION_NAME(-2);
 
     final int code;
     static final Version[] byCode;
@@ -78,11 +80,12 @@ implements Writable, Comparable<TableSplit> {
     }
   }
 
-  private static final Version VERSION = Version.INITIAL;
+  private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
   private TableName tableName;
   private byte [] startRow;
   private byte [] endRow;
   private String regionLocation;
+  private String encodedRegionName = "";
   private String scan = ""; // stores the serialized form of the Scan
   private long length; // Contains estimation of region size in bytes
 
@@ -95,6 +98,7 @@ implements Writable, Comparable<TableSplit> {
   /**
    * Creates a new instance while assigning all variables.
    * Length of region is set to 0
+   * Encoded name of the region is set to blank
    *
    * @param tableName  The name of the current table.
    * @param scan The scan associated with this split.
@@ -109,6 +113,7 @@ implements Writable, Comparable<TableSplit> {
 
   /**
    * Creates a new instance while assigning all variables.
+   * Encoded name of region is set to blank
    *
    * @param tableName  The name of the current table.
    * @param scan The scan associated with this split.
@@ -118,6 +123,21 @@ implements Writable, Comparable<TableSplit> {
    */
   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] 
endRow,
       final String location, long length) {
+    this(tableName, scan, startRow, endRow, location, "", length);
+  }
+
+  /**
+   * Creates a new instance while assigning all variables.
+   *
+   * @param tableName  The name of the current table.
+   * @param scan The scan associated with this split.
+   * @param startRow  The start row of the split.
+   * @param endRow  The end row of the split.
+   * @param encodedRegionName The region ID.
+   * @param location  The location of the region.
+   */
+  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] 
endRow,
+      final String location, final String encodedRegionName, long length) {
     this.tableName = tableName;
     try {
       this.scan =
@@ -128,11 +148,13 @@ implements Writable, Comparable<TableSplit> {
     this.startRow = startRow;
     this.endRow = endRow;
     this.regionLocation = location;
+    this.encodedRegionName = encodedRegionName;
     this.length = length;
   }
 
   /**
    * Creates a new instance without a scanner.
+   * Length of region is set to 0
    *
    * @param tableName The name of the current table.
    * @param startRow The start row of the split.
@@ -228,6 +250,15 @@ implements Writable, Comparable<TableSplit> {
   }
 
   /**
+   * Returns the region's encoded name.
+   *
+   * @return The region's encoded name.
+   */
+  public String getEncodedRegionName() {
+    return encodedRegionName;
+  }
+
+  /**
    * Returns the length of the split.
    *
    * @return The length of the split.
@@ -271,6 +302,9 @@ implements Writable, Comparable<TableSplit> {
       scan = Bytes.toString(Bytes.readByteArray(in));
     }
     length = WritableUtils.readVLong(in);
+    if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
+      encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
+    }
   }
 
   /**
@@ -288,6 +322,7 @@ implements Writable, Comparable<TableSplit> {
     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
     Bytes.writeByteArray(out, Bytes.toBytes(scan));
     WritableUtils.writeVLong(out, length);
+    Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
   }
 
   /**
@@ -316,6 +351,7 @@ implements Writable, Comparable<TableSplit> {
     sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
     sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
     sb.append(", region location: ").append(regionLocation);
+    sb.append(", encoded region name: ").append(encodedRegionName);
     sb.append(")");
     return sb.toString();
   }
@@ -348,13 +384,14 @@ implements Writable, Comparable<TableSplit> {
       regionLocation.equals(((TableSplit)o).regionLocation);
   }
 
-    @Override
-    public int hashCode() {
-        int result = tableName != null ? tableName.hashCode() : 0;
-        result = 31 * result + (scan != null ? scan.hashCode() : 0);
-        result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 
0);
-        result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
-        result = 31 * result + (regionLocation != null ? 
regionLocation.hashCode() : 0);
-        return result;
-    }
+  @Override
+  public int hashCode() {
+    int result = tableName != null ? tableName.hashCode() : 0;
+    result = 31 * result + (scan != null ? scan.hashCode() : 0);
+    result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
+    result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
+    result = 31 * result + (regionLocation != null ? regionLocation.hashCode() 
: 0);
+    result = 31 * result + (encodedRegionName != null ? 
encodedRegionName.hashCode() : 0);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/7d3a89ce/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
index 59f787f..47a5ca7 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java
@@ -93,13 +93,31 @@ public class TestTableSplit {
             "location");
     String str =
         "HBase table split(table name: table, scan: , start row: row-start, "
-            + "end row: row-end, region location: location)";
+            + "end row: row-end, region location: location, "
+            + "encoded region name: )";
+    Assert.assertEquals(str, split.toString());
+
+    split =
+        new TableSplit(TableName.valueOf("table"), null, 
"row-start".getBytes(),
+            "row-end".getBytes(), "location", "encoded-region-name", 1000L);
+    str =
+        "HBase table split(table name: table, scan: , start row: row-start, "
+            + "end row: row-end, region location: location, "
+            + "encoded region name: encoded-region-name)";
     Assert.assertEquals(str, split.toString());
 
     split = new TableSplit((TableName) null, null, null, null);
     str =
         "HBase table split(table name: null, scan: , start row: null, "
-            + "end row: null, region location: null)";
+            + "end row: null, region location: null, "
+            + "encoded region name: )";
+    Assert.assertEquals(str, split.toString());
+
+    split = new TableSplit((TableName) null, null, null, null, null, null, 
1000L);
+    str =
+        "HBase table split(table name: null, scan: , start row: null, "
+            + "end row: null, region location: null, "
+            + "encoded region name: null)";
     Assert.assertEquals(str, split.toString());
   }
 }

Reply via email to