This is an automated email from the ASF dual-hosted git repository.

apurtell pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 83d5a9e  HBASE-26878 TableInputFormatBase should cache 
RegionSizeCalculator (#4271)
83d5a9e is described below

commit 83d5a9ea2769193812232349224340c9834f3cd4
Author: Bryan Beaudreault <[email protected]>
AuthorDate: Thu Mar 24 17:54:49 2022 -0400

    HBASE-26878 TableInputFormatBase should cache RegionSizeCalculator (#4271)
    
    Signed-off-by: Andrew Purtell <[email protected]>
---
 .../hbase/mapreduce/TableInputFormatBase.java      | 19 +++++++++-----
 .../hbase/mapreduce/TestTableInputFormatBase.java  | 30 +++++++++++++++++++++-
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
index 61672d4..92d1f11 100644
--- 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
+++ 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
@@ -139,6 +139,8 @@ public abstract class TableInputFormatBase
   private TableRecordReader tableRecordReader = null;
   /** The underlying {@link Connection} of the table. */
   private Connection connection;
+  /** Used to generate splits based on region size. */
+  private RegionSizeCalculator regionSizeCalculator;
 
 
   /** The reverse DNS lookup cache mapping: IPAddress => HostName */
@@ -288,8 +290,11 @@ public abstract class TableInputFormatBase
    * @throws IOException throws IOException
    */
   private List<InputSplit> oneInputSplitPerRegion() throws IOException {
-    RegionSizeCalculator sizeCalculator =
-        createRegionSizeCalculator(getRegionLocator(), getAdmin());
+    if (regionSizeCalculator == null) {
+      // Initialize here rather than with the other resources because this 
involves
+      // a full scan of meta, which can be heavy. We might as well only do it 
if/when necessary.
+      regionSizeCalculator = createRegionSizeCalculator(getRegionLocator(), 
getAdmin());
+    }
 
     TableName tableName = getTable().getName();
 
@@ -302,7 +307,7 @@ public abstract class TableInputFormatBase
         throw new IOException("Expecting at least one region.");
       }
       List<InputSplit> splits = new ArrayList<>(1);
-      long regionSize = 
sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
+      long regionSize = 
regionSizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
       // In the table input format for single table we do not need to
       // store the scan object in table split because it can be memory 
intensive and redundant
       // information to what is already stored in conf SCAN. See HBASE-25212
@@ -343,9 +348,9 @@ public abstract class TableInputFormatBase
         String regionLocation;
         regionLocation = reverseDNS(regionAddress);
 
-        byte[] regionName = location.getRegionInfo().getRegionName();
-        String encodedRegionName = location.getRegionInfo().getEncodedName();
-        long regionSize = sizeCalculator.getRegionSize(regionName);
+        byte[] regionName = location.getRegion().getRegionName();
+        String encodedRegionName = location.getRegion().getEncodedName();
+        long regionSize = regionSizeCalculator.getRegionSize(regionName);
         // In the table input format for single table we do not need to
         // store the scan object in table split because it can be memory 
intensive and redundant
         // information to what is already stored in conf SCAN. See HBASE-25212
@@ -597,6 +602,7 @@ public abstract class TableInputFormatBase
     this.regionLocator = connection.getRegionLocator(tableName);
     this.admin = connection.getAdmin();
     this.connection = connection;
+    this.regionSizeCalculator = null;
   }
 
   @InterfaceAudience.Private
@@ -664,6 +670,7 @@ public abstract class TableInputFormatBase
     table = null;
     regionLocator = null;
     connection = null;
+    regionSizeCalculator = null;
   }
 
   private void close(Closeable... closables) throws IOException {
diff --git 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
index 4436ee1..ee46726 100644
--- 
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
+++ 
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
@@ -22,7 +22,6 @@ import static org.mockito.Mockito.any;
 import static org.mockito.Mockito.anyBoolean;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
-
 import java.io.IOException;
 import java.net.Inet6Address;
 import java.net.InetAddress;
@@ -55,6 +54,7 @@ import org.apache.hadoop.mapreduce.JobContext;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
@@ -66,6 +66,34 @@ public class TestTableInputFormatBase {
       HBaseClassTestRule.forClass(TestTableInputFormatBase.class);
 
   @Test
+  public void testReuseRegionSizeCalculator() throws IOException {
+    JobContext context = mock(JobContext.class);
+    Configuration conf = HBaseConfiguration.create();
+    conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL,
+      ConnectionForMergeTesting.class.getName());
+    conf.set(TableInputFormat.INPUT_TABLE, "testTable");
+    conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
+    when(context.getConfiguration()).thenReturn(conf);
+
+    TableInputFormat format = Mockito.spy(new 
TableInputFormatForMergeTesting());
+    format.setConf(conf);
+    // initialize so that table is set, otherwise cloneOnFinish
+    // will be true and each getSplits call will re-initialize everything
+    format.initialize(context);
+    format.getSplits(context);
+    format.getSplits(context);
+
+    // re-initialize which will cause the next getSplits call to create a new 
RegionSizeCalculator
+    format.initialize(context);
+    format.getSplits(context);
+    format.getSplits(context);
+
+    // should only be 2 despite calling getSplits 4 times
+    Mockito.verify(format, Mockito.times(2))
+      .createRegionSizeCalculator(Mockito.any(), Mockito.any());
+  }
+
+  @Test
   public void testTableInputFormatBaseReverseDNSForIPv6()
       throws UnknownHostException {
     String address = "ipv6.google.com";

Reply via email to