This is an automated email from the ASF dual-hosted git repository.
stack pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 3331e83 HBASE-25677 Server+table counters on each scan #nextRaw
invocation becomes a bottleneck when heavy load (#3061)
3331e83 is described below
commit 3331e8307ae0da8dc0779751aafd38f034837fb5
Author: Michael Stack <[email protected]>
AuthorDate: Thu Mar 18 11:33:45 2021 -0700
HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes
a bottleneck when heavy load (#3061)
Don't have every handler update regionserver metrics on each
scan#nextRaw; instead, do a batch update just before Scan
returns. Otherwise, all running handlers end up contending
on metrics update.
M
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
Update of regionserver metrics counters moved out to caller where
can be done as a batch update instead of per-next.
M
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
Class doc to encourage batch updating metrics.
Remove the single update as unused anymore.
M
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
Count calls to nextRaw. Update regionserver count in finally block when
scan is done rather than per nextRaw call. Move all metrics updates to
finally.
Signed-off-by: Reid Chan <[email protected]>
Signed-off-by: Baiqiang Zhao <ZhaoBQ>
---
.../org/apache/hadoop/hbase/regionserver/HRegion.java | 3 ---
.../hbase/regionserver/MetricsRegionServer.java | 18 ++++++------------
.../hadoop/hbase/regionserver/RSRpcServices.java | 19 +++++++++++++------
3 files changed, 19 insertions(+), 21 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index d54dc79..0f3649b 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -7282,9 +7282,6 @@ public class HRegion implements HeapSize,
PropagatingConfigurationObserver, Regi
metricsRegion.updateReadRequestCount();
}
}
- if (rsServices != null && rsServices.getMetrics() != null) {
-
rsServices.getMetrics().updateReadQueryMeter(getRegionInfo().getTable());
- }
// If the size limit was reached it means a partial Result is being
returned. Returning a
// partial Result means that we should not reset the filters; filters
should only be reset in
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
index 00324f3..f593f39 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
@@ -28,12 +28,11 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
/**
- * <p>
- * This class is for maintaining the various regionserver statistics
- * and publishing them through the metrics interfaces.
- * </p>
+ * Maintains regionserver statistics and publishes them through the metrics
interfaces.
* This class has a number of metrics variables that are publicly accessible;
- * these variables (objects) have methods to update their values.
+ * these variables (objects) have methods to update their values. Batch your
updates rather than
+ * call on each instance else all threads will do nothing but contend trying
to maintain metric
+ * counters!
*/
@InterfaceStability.Evolving
@InterfaceAudience.Private
@@ -52,7 +51,9 @@ public class MetricsRegionServer {
private MetricRegistry metricRegistry;
private Timer bulkLoadTimer;
+ // Incremented once for each call to Scan#nextRaw
private Meter serverReadQueryMeter;
+ // Incremented per write.
private Meter serverWriteQueryMeter;
protected long slowMetricTime;
protected static final int DEFAULT_SLOW_METRIC_TIME = 1000; // milliseconds
@@ -272,13 +273,6 @@ public class MetricsRegionServer {
this.serverReadQueryMeter.mark(count);
}
- public void updateReadQueryMeter(TableName tn) {
- if (tableMetrics != null && tn != null) {
- tableMetrics.updateTableReadQueryMeter(tn);
- }
- this.serverReadQueryMeter.mark();
- }
-
public void updateWriteQueryMeter(TableName tn, long count) {
if (tableMetrics != null && tn != null) {
tableMetrics.updateTableWriteQueryMeter(tn, count);
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 59ca7c8..288ba76 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -3282,10 +3282,13 @@ public class RSRpcServices implements
HBaseRPCErrorHandler,
// arbitrary 32. TODO: keep record of general size of results being
returned.
List<Cell> values = new ArrayList<>(32);
region.startRegionOperation(Operation.SCAN);
+ long before = EnvironmentEdgeManager.currentTime();
+ // Used to check if we've matched the row limit set on the Scan
+ int numOfCompleteRows = 0;
+ // Count of times we call nextRaw; can be > numOfCompleteRows.
+ int numOfNextRawCalls = 0;
try {
int numOfResults = 0;
- int numOfCompleteRows = 0;
- long before = EnvironmentEdgeManager.currentTime();
synchronized (scanner) {
boolean stale = (region.getRegionInfo().getReplicaId() != 0);
boolean clientHandlesPartials =
@@ -3341,6 +3344,7 @@ public class RSRpcServices implements
HBaseRPCErrorHandler,
// Collect values to be returned here
moreRows = scanner.nextRaw(values, scannerContext);
+ numOfNextRawCalls++;
if (!values.isEmpty()) {
if (limitOfRows > 0) {
@@ -3432,18 +3436,21 @@ public class RSRpcServices implements
HBaseRPCErrorHandler,
builder.setScanMetrics(metricBuilder.build());
}
}
+ } finally {
+ region.closeRegionOperation();
+ // Update serverside metrics, even on error.
long end = EnvironmentEdgeManager.currentTime();
long responseCellSize = context != null ? context.getResponseCellSize()
: 0;
region.getMetrics().updateScanTime(end - before);
final MetricsRegionServer metricsRegionServer =
regionServer.getMetrics();
if (metricsRegionServer != null) {
metricsRegionServer.updateScanSize(
- region.getTableDescriptor().getTableName(), responseCellSize);
+ region.getTableDescriptor().getTableName(), responseCellSize);
metricsRegionServer.updateScanTime(
- region.getTableDescriptor().getTableName(), end - before);
+ region.getTableDescriptor().getTableName(), end - before);
+
metricsRegionServer.updateReadQueryMeter(region.getRegionInfo().getTable(),
+ numOfNextRawCalls);
}
- } finally {
- region.closeRegionOperation();
}
// coprocessor postNext hook
if (region.getCoprocessorHost() != null) {