This is an automated email from the ASF dual-hosted git repository.
stack pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 8f16e34 HBASE-26122: Implement an optional maximum size for Gets,
after which a partial result is returned (#3532)
8f16e34 is described below
commit 8f16e34eb277097678de73aab1d242c959511ef7
Author: Bryan Beaudreault <[email protected]>
AuthorDate: Tue Aug 10 23:38:06 2021 -0400
HBASE-26122: Implement an optional maximum size for Gets, after which a
partial result is returned (#3532)
Signed-off-by: stack <[email protected]>
---
.../java/org/apache/hadoop/hbase/client/Get.java | 23 ++++++
.../apache/hadoop/hbase/protobuf/ProtobufUtil.java | 2 +-
.../hadoop/hbase/shaded/protobuf/ProtobufUtil.java | 9 ++-
.../src/main/protobuf/Client.proto | 2 +
.../apache/hadoop/hbase/regionserver/HRegion.java | 36 ++++++---
.../hadoop/hbase/regionserver/RSRpcServices.java | 10 ++-
.../hbase/TestPartialResultsFromClientSide.java | 48 ++++++++++++
.../hadoop/hbase/regionserver/TestHRegion.java | 85 ++++++++++++++++++++++
8 files changed, 202 insertions(+), 13 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java
index a671b9f..53b7154 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java
@@ -76,6 +76,7 @@ public class Get extends Query implements Row {
private boolean checkExistenceOnly = false;
private boolean closestRowBefore = false;
private Map<byte [], NavigableSet<byte []>> familyMap = new
TreeMap<>(Bytes.BYTES_COMPARATOR);
+ private long maxResultSize = -1;
/**
* Create a Get operation for the specified row.
@@ -339,6 +340,21 @@ public class Get extends Query implements Row {
return this;
}
+ /**
+ * Set the maximum result size. The default is -1; this means that no
specific
+ * maximum result size will be set for this Get.
+ *
+ * If set to a value greater than zero, the server may respond with a Result
where
+ * {@link Result#mayHaveMoreCellsInRow()} is true. The user is required to
handle
+ * this case.
+ *
+ * @param maxResultSize The maximum result size in bytes
+ */
+ public Get setMaxResultSize(long maxResultSize) {
+ this.maxResultSize = maxResultSize;
+ return this;
+ }
+
/* Accessors */
/**
@@ -459,6 +475,13 @@ public class Get extends Query implements Row {
}
/**
+ * @return the maximum result size in bytes. See {@link
#setMaxResultSize(long)}
+ */
+ public long getMaxResultSize() {
+ return maxResultSize;
+ }
+
+ /**
* Compile the details beyond the scope of getFingerprint (row, columns,
* timestamps, etc.) into a Map along with the fingerprinted information.
* Useful for debugging, logging, and administration tools.
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
index 5a01af9..1c17866 100644
---
a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
+++
b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
@@ -1382,7 +1382,7 @@ public final class ProtobufUtil {
return (cells == null || cells.isEmpty())
? (proto.getStale() ? EMPTY_RESULT_STALE : EMPTY_RESULT)
- : Result.create(cells, null, proto.getStale());
+ : Result.create(cells, null, proto.getStale(), proto.getPartial());
}
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
index c2544f6..d6c7811 100644
---
a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
+++
b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
@@ -592,6 +592,9 @@ public final class ProtobufUtil {
if (proto.hasLoadColumnFamiliesOnDemand()) {
get.setLoadColumnFamiliesOnDemand(proto.getLoadColumnFamiliesOnDemand());
}
+ if (proto.hasMaxResultSize()) {
+ get.setMaxResultSize(proto.getMaxResultSize());
+ }
return get;
}
@@ -1256,6 +1259,9 @@ public final class ProtobufUtil {
if (loadColumnFamiliesOnDemand != null) {
builder.setLoadColumnFamiliesOnDemand(loadColumnFamiliesOnDemand);
}
+ if (get.getMaxResultSize() > 0) {
+ builder.setMaxResultSize(get.getMaxResultSize());
+ }
return builder.build();
}
@@ -1457,6 +1463,7 @@ public final class ProtobufUtil {
ClientProtos.Result.Builder builder = ClientProtos.Result.newBuilder();
builder.setAssociatedCellCount(size);
builder.setStale(result.isStale());
+ builder.setPartial(result.mayHaveMoreCellsInRow());
return builder.build();
}
@@ -1547,7 +1554,7 @@ public final class ProtobufUtil {
return (cells == null || cells.isEmpty())
? (proto.getStale() ? EMPTY_RESULT_STALE : EMPTY_RESULT)
- : Result.create(cells, null, proto.getStale());
+ : Result.create(cells, null, proto.getStale(), proto.getPartial());
}
diff --git a/hbase-protocol-shaded/src/main/protobuf/Client.proto
b/hbase-protocol-shaded/src/main/protobuf/Client.proto
index 13917b6..7081d50 100644
--- a/hbase-protocol-shaded/src/main/protobuf/Client.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/Client.proto
@@ -90,6 +90,8 @@ message Get {
optional Consistency consistency = 12 [default = STRONG];
repeated ColumnFamilyTimeRange cf_time_range = 13;
optional bool load_column_families_on_demand = 14; /* DO NOT add defaults to
load_column_families_on_demand. */
+
+ optional uint64 max_result_size = 15;
}
message Result {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 6628328..9751db8 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -146,6 +146,7 @@ import
org.apache.hadoop.hbase.quotas.RegionServerSpaceQuotaManager;
import
org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
import
org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
+import org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope;
import
org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory;
import
org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
import org.apache.hadoop.hbase.regionserver.throttle.StoreHotnessProtector;
@@ -3864,8 +3865,7 @@ public class HRegion implements HeapSize,
PropagatingConfigurationObserver, Regi
Result result;
if (returnResults) {
// convert duplicate increment/append to get
- List<Cell> results = region.get(toGet(mutation), false,
nonceGroup, nonce);
- result = Result.create(results);
+ result = region.get(toGet(mutation), false, nonceGroup, nonce);
} else {
result = Result.EMPTY_RESULT;
}
@@ -7497,9 +7497,7 @@ public class HRegion implements HeapSize,
PropagatingConfigurationObserver, Regi
@Override
public Result get(final Get get) throws IOException {
prepareGet(get);
- List<Cell> results = get(get, true);
- boolean stale = this.getRegionInfo().getReplicaId() != 0;
- return Result.create(results, get.isCheckExistenceOnly() ?
!results.isEmpty() : null, stale);
+ return get(get, true, HConstants.NO_NONCE, HConstants.NO_NONCE);
}
void prepareGet(final Get get) throws IOException {
@@ -7518,11 +7516,31 @@ public class HRegion implements HeapSize,
PropagatingConfigurationObserver, Regi
@Override
public List<Cell> get(Get get, boolean withCoprocessor) throws IOException {
- return get(get, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE);
+ return getInternal(get, null, withCoprocessor, HConstants.NO_NONCE,
HConstants.NO_NONCE);
}
- private List<Cell> get(Get get, boolean withCoprocessor, long nonceGroup,
long nonce)
- throws IOException {
+ private Result get(Get get, boolean withCoprocessor, long nonceGroup, long
nonce)
+ throws IOException {
+ ScannerContext scannerContext = get.getMaxResultSize() > 0
+ ? ScannerContext.newBuilder()
+ .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(),
get.getMaxResultSize())
+ .build()
+ : null;
+
+ List<Cell> result = getInternal(get, scannerContext, withCoprocessor,
nonceGroup, nonce);
+ boolean stale = this.getRegionInfo().getReplicaId() != 0;
+ boolean mayHaveMoreCellsInRow =
+ scannerContext != null && scannerContext.mayHaveMoreCellsInRow();
+
+ return Result.create(
+ result,
+ get.isCheckExistenceOnly() ? !result.isEmpty() : null,
+ stale,
+ mayHaveMoreCellsInRow);
+ }
+
+ private List<Cell> getInternal(Get get, ScannerContext scannerContext,
boolean withCoprocessor,
+ long nonceGroup, long nonce) throws IOException {
List<Cell> results = new ArrayList<>();
long before = EnvironmentEdgeManager.currentTime();
@@ -7539,7 +7557,7 @@ public class HRegion implements HeapSize,
PropagatingConfigurationObserver, Regi
}
try (RegionScanner scanner = getScanner(scan, null, nonceGroup, nonce)) {
List<Cell> tmp = new ArrayList<>();
- scanner.next(tmp);
+ scanner.next(tmp, scannerContext);
// Copy EC to heap, then close the scanner.
// This can be an EXPENSIVE call. It may make an extra copy from offheap
to onheap buffers.
// See more details in HBASE-26036.
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 06e7ccf..b715c09 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -2668,10 +2668,15 @@ public class RSRpcServices implements
HBaseRPCErrorHandler,
if (scan.getLoadColumnFamiliesOnDemandValue() == null) {
scan.setLoadColumnFamiliesOnDemand(region.isLoadingCfsOnDemandDefault());
}
+
+ ScannerContext scannerContext = ScannerContext.newBuilder()
+ .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(),
get.getMaxResultSize())
+ .build();
+
RegionScannerImpl scanner = null;
try {
scanner = region.getScanner(scan);
- scanner.next(results);
+ scanner.next(results, scannerContext);
} finally {
if (scanner != null) {
if (closeCallBack == null) {
@@ -2696,7 +2701,8 @@ public class RSRpcServices implements
HBaseRPCErrorHandler,
}
region.metricsUpdateForGet(results, before);
- return Result.create(results, get.isCheckExistenceOnly() ?
!results.isEmpty() : null, stale);
+ return Result.create(results, get.isCheckExistenceOnly() ?
!results.isEmpty() : null, stale,
+ scannerContext.mayHaveMoreCellsInRow());
}
private void checkBatchSizeAndLogLargeSize(MultiRequest request) throws
ServiceException {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
index 4e2d133..19fb996 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
@@ -31,18 +31,26 @@ import java.util.List;
import java.util.Set;
import org.apache.hadoop.hbase.client.ClientScanner;
import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.BinaryComparator;
+import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.ColumnRangeFilter;
+import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.hbase.filter.FilterListWithAND;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.filter.FirstKeyValueMatchingQualifiersFilter;
+import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.filter.RandomRowFilter;
+import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
@@ -136,6 +144,46 @@ public class TestPartialResultsFromClientSide {
TEST_UTIL.shutdownMiniCluster();
}
+ @Test
+ public void testGetPartialResults() throws Exception {
+ byte[] row = ROWS[0];
+
+ Result result;
+ int cf = 0;
+ int qf = 0;
+ int total = 0;
+
+ do {
+ // this will ensure we always return only 1 result
+ Get get = new Get(row)
+ .setMaxResultSize(1);
+
+ // we want to page through the entire row, this will ensure we always
get the next
+ if (total > 0) {
+ get.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL,
+ new ColumnRangeFilter(QUALIFIERS[qf], true, null, false),
+ new FamilyFilter(CompareOperator.GREATER_OR_EQUAL, new
BinaryComparator(FAMILIES[cf]))));
+ }
+
+ // all values are the same, but there should be a value
+ result = TABLE.get(get);
+ assertTrue(String.format("Value for family %s (# %s) and qualifier %s (#
%s)",
+ Bytes.toStringBinary(FAMILIES[cf]), cf,
Bytes.toStringBinary(QUALIFIERS[qf]), qf),
+ Bytes.equals(VALUE, result.getValue(FAMILIES[cf], QUALIFIERS[qf])));
+
+ total++;
+ if (++qf >= NUM_QUALIFIERS) {
+ cf++;
+ qf = 0;
+ }
+ } while (result.mayHaveMoreCellsInRow());
+
+ // ensure we iterated all cells in row
+ assertEquals(NUM_COLS, total);
+ assertEquals(NUM_FAMILIES, cf);
+ assertEquals(0, qf);
+ }
+
/**
* Ensure that the expected key values appear in a result returned from a
scanner that is
* combining partial results into complete results
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index 9763841..3d00eb8 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -7861,4 +7861,89 @@ public class TestHRegion {
assertFalse("Region lock holder should not have been interrupted",
holderInterrupted.get());
}
+ @Test
+ public void testOversizedGetsReturnPartialResult() throws IOException {
+ HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+ Put p = new Put(row)
+ .addColumn(fam1, qual1, value1)
+ .addColumn(fam1, qual2, value2);
+
+ region.put(p);
+
+ Get get = new Get(row)
+ .addColumn(fam1, qual1)
+ .addColumn(fam1, qual2)
+ .setMaxResultSize(1); // 0 doesn't count as a limit, according to HBase
+
+ Result r = region.get(get);
+
+ assertTrue("Expected partial result, but result was not marked as
partial", r.mayHaveMoreCellsInRow());
+ }
+
+ @Test
+ public void testGetsWithoutResultSizeLimitAreNotPartial() throws IOException
{
+ HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+ Put p = new Put(row)
+ .addColumn(fam1, qual1, value1)
+ .addColumn(fam1, qual2, value2);
+
+ region.put(p);
+
+ Get get = new Get(row)
+ .addColumn(fam1, qual1)
+ .addColumn(fam1, qual2);
+
+ Result r = region.get(get);
+
+ assertFalse("Expected full result, but it was marked as partial",
r.mayHaveMoreCellsInRow());
+ assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1)));
+ assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2)));
+ }
+
+ @Test
+ public void testGetsWithinResultSizeLimitAreNotPartial() throws IOException {
+ HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+ Put p = new Put(row)
+ .addColumn(fam1, qual1, value1)
+ .addColumn(fam1, qual2, value2);
+
+ region.put(p);
+
+ Get get = new Get(row)
+ .addColumn(fam1, qual1)
+ .addColumn(fam1, qual2)
+ .setMaxResultSize(Long.MAX_VALUE);
+
+ Result r = region.get(get);
+
+ assertFalse("Expected full result, but it was marked as partial",
r.mayHaveMoreCellsInRow());
+ assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1)));
+ assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2)));
+ }
+
+ @Test
+ public void testGetsWithResultSizeLimitReturnPartialResults() throws
IOException {
+ HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+ Put p = new Put(row)
+ .addColumn(fam1, qual1, value1)
+ .addColumn(fam1, qual2, value2);
+
+ region.put(p);
+
+ Get get = new Get(row)
+ .addColumn(fam1, qual1)
+ .addColumn(fam1, qual2)
+ .setMaxResultSize(10);
+
+ Result r = region.get(get);
+
+ assertTrue("Expected partial result, but it was marked as complete",
r.mayHaveMoreCellsInRow());
+ assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1)));
+ assertEquals("Got more results than expected", 1, r.size());
+ }
+
}