This is an automated email from the ASF dual-hosted git repository.
apurtell pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 83d5a9e HBASE-26878 TableInputFormatBase should cache
RegionSizeCalculator (#4271)
83d5a9e is described below
commit 83d5a9ea2769193812232349224340c9834f3cd4
Author: Bryan Beaudreault <[email protected]>
AuthorDate: Thu Mar 24 17:54:49 2022 -0400
HBASE-26878 TableInputFormatBase should cache RegionSizeCalculator (#4271)
Signed-off-by: Andrew Purtell <[email protected]>
---
.../hbase/mapreduce/TableInputFormatBase.java | 19 +++++++++-----
.../hbase/mapreduce/TestTableInputFormatBase.java | 30 +++++++++++++++++++++-
2 files changed, 42 insertions(+), 7 deletions(-)
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
index 61672d4..92d1f11 100644
---
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
@@ -139,6 +139,8 @@ public abstract class TableInputFormatBase
private TableRecordReader tableRecordReader = null;
/** The underlying {@link Connection} of the table. */
private Connection connection;
+ /** Used to generate splits based on region size. */
+ private RegionSizeCalculator regionSizeCalculator;
/** The reverse DNS lookup cache mapping: IPAddress => HostName */
@@ -288,8 +290,11 @@ public abstract class TableInputFormatBase
* @throws IOException throws IOException
*/
private List<InputSplit> oneInputSplitPerRegion() throws IOException {
- RegionSizeCalculator sizeCalculator =
- createRegionSizeCalculator(getRegionLocator(), getAdmin());
+ if (regionSizeCalculator == null) {
+ // Initialize here rather than with the other resources because this
involves
+ // a full scan of meta, which can be heavy. We might as well only do it
if/when necessary.
+ regionSizeCalculator = createRegionSizeCalculator(getRegionLocator(),
getAdmin());
+ }
TableName tableName = getTable().getName();
@@ -302,7 +307,7 @@ public abstract class TableInputFormatBase
throw new IOException("Expecting at least one region.");
}
List<InputSplit> splits = new ArrayList<>(1);
- long regionSize =
sizeCalculator.getRegionSize(regLoc.getRegionInfo().getRegionName());
+ long regionSize =
regionSizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
// In the table input format for single table we do not need to
// store the scan object in table split because it can be memory
intensive and redundant
// information to what is already stored in conf SCAN. See HBASE-25212
@@ -343,9 +348,9 @@ public abstract class TableInputFormatBase
String regionLocation;
regionLocation = reverseDNS(regionAddress);
- byte[] regionName = location.getRegionInfo().getRegionName();
- String encodedRegionName = location.getRegionInfo().getEncodedName();
- long regionSize = sizeCalculator.getRegionSize(regionName);
+ byte[] regionName = location.getRegion().getRegionName();
+ String encodedRegionName = location.getRegion().getEncodedName();
+ long regionSize = regionSizeCalculator.getRegionSize(regionName);
// In the table input format for single table we do not need to
// store the scan object in table split because it can be memory
intensive and redundant
// information to what is already stored in conf SCAN. See HBASE-25212
@@ -597,6 +602,7 @@ public abstract class TableInputFormatBase
this.regionLocator = connection.getRegionLocator(tableName);
this.admin = connection.getAdmin();
this.connection = connection;
+ this.regionSizeCalculator = null;
}
@InterfaceAudience.Private
@@ -664,6 +670,7 @@ public abstract class TableInputFormatBase
table = null;
regionLocator = null;
connection = null;
+ regionSizeCalculator = null;
}
private void close(Closeable... closables) throws IOException {
diff --git
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
index 4436ee1..ee46726 100644
---
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
+++
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormatBase.java
@@ -22,7 +22,6 @@ import static org.mockito.Mockito.any;
import static org.mockito.Mockito.anyBoolean;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
-
import java.io.IOException;
import java.net.Inet6Address;
import java.net.InetAddress;
@@ -55,6 +54,7 @@ import org.apache.hadoop.mapreduce.JobContext;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
@@ -66,6 +66,34 @@ public class TestTableInputFormatBase {
HBaseClassTestRule.forClass(TestTableInputFormatBase.class);
@Test
+ public void testReuseRegionSizeCalculator() throws IOException {
+ JobContext context = mock(JobContext.class);
+ Configuration conf = HBaseConfiguration.create();
+ conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL,
+ ConnectionForMergeTesting.class.getName());
+ conf.set(TableInputFormat.INPUT_TABLE, "testTable");
+ conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
+ when(context.getConfiguration()).thenReturn(conf);
+
+ TableInputFormat format = Mockito.spy(new
TableInputFormatForMergeTesting());
+ format.setConf(conf);
+ // initialize so that table is set, otherwise cloneOnFinish
+ // will be true and each getSplits call will re-initialize everything
+ format.initialize(context);
+ format.getSplits(context);
+ format.getSplits(context);
+
+ // re-initialize which will cause the next getSplits call to create a new
RegionSizeCalculator
+ format.initialize(context);
+ format.getSplits(context);
+ format.getSplits(context);
+
+ // should only be 2 despite calling getSplits 4 times
+ Mockito.verify(format, Mockito.times(2))
+ .createRegionSizeCalculator(Mockito.any(), Mockito.any());
+ }
+
+ @Test
public void testTableInputFormatBaseReverseDNSForIPv6()
throws UnknownHostException {
String address = "ipv6.google.com";