jbewing commented on code in PR #5373:
URL: https://github.com/apache/hbase/pull/5373#discussion_r1385605592
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java:
##########
@@ -486,52 +500,171 @@ public boolean shouldUseScanner(Scan scan, HStore store,
long oldestUnexpiredTS)
@Override
public boolean seekToPreviousRow(Cell originalKey) throws IOException {
try {
- try {
- boolean keepSeeking = false;
- Cell key = originalKey;
- do {
- Cell seekKey = PrivateCellUtil.createFirstOnRow(key);
- if (seekCount != null) seekCount.increment();
- if (!hfs.seekBefore(seekKey)) {
- this.cur = null;
- return false;
- }
- Cell curCell = hfs.getCell();
- Cell firstKeyOfPreviousRow =
PrivateCellUtil.createFirstOnRow(curCell);
-
- if (seekCount != null) seekCount.increment();
- if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
- this.cur = null;
- return false;
- }
-
- setCurrentCell(hfs.getCell());
- this.stopSkippingKVsIfNextRow = true;
- boolean resultOfSkipKVs;
- try {
- resultOfSkipKVs = skipKVsNewerThanReadpoint();
- } finally {
- this.stopSkippingKVsIfNextRow = false;
- }
- if (!resultOfSkipKVs || getComparator().compareRows(cur,
firstKeyOfPreviousRow) > 0) {
- keepSeeking = true;
- key = firstKeyOfPreviousRow;
- continue;
- } else {
- keepSeeking = false;
- }
- } while (keepSeeking);
- return true;
- } finally {
- realSeekDone = true;
+ if (isFastSeekingEncoding) {
+ return seekToPreviousRowStateless(originalKey);
+ } else if (previousRow == null ||
getComparator().compareRows(previousRow, originalKey) > 0) {
+ return seekToPreviousRowWithoutHint(originalKey);
+ } else {
+ return seekToPreviousRowWithHint(originalKey);
}
} catch (FileNotFoundException e) {
throw e;
} catch (IOException ioe) {
throw new IOException("Could not seekToPreviousRow " + this + " to key "
+ originalKey, ioe);
+ } finally {
+ this.realSeekDone = true;
}
}
+ private boolean seekToPreviousRowWithHint(Cell originalKey) throws
IOException {
+ do {
+ if (previousRow == null) {
+ return seekToPreviousRowWithoutHint(originalKey);
+ }
+
+ Cell firstKeyOfPreviousRow =
PrivateCellUtil.createFirstOnRow(previousRow);
+ if (!seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (
+ setReadpointAndSkipNewerKvs()
+ && getComparator().compareRows(cur, firstKeyOfPreviousRow) <= 0
+ ) {
+ return true;
+ }
+ } while (true);
+ }
+
+ private boolean seekToPreviousRowWithoutHint(Cell originalKey) throws
IOException {
+ boolean keepSeeking;
+ Cell key = originalKey;
+ do {
+ // Rewind to the cell before the beginning of this row
+ Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(key);
+ if (!seekBefore(keyAtBeginningOfRow)) {
+ return false;
+ }
+
+ // Rewind before this row and save what we find as a seek hint
+ Cell firstKeyOfPreviousRow =
PrivateCellUtil.createFirstOnRow(hfs.getCell());
+ if (!seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ // Seek back to the start of the previous row
+ if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (
+ !setReadpointAndSkipNewerKvs()
+ || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0
+ ) {
+ keepSeeking = true;
+ key = firstKeyOfPreviousRow;
+ } else {
+ keepSeeking = false;
+ }
+ } while (keepSeeking);
+ return true;
+ }
+
+ /**
+ * This seekToPreviousRow method requires two seeks from the beginning of a
block. It should be
+ * used if the cost for seeking to the beginning of a block is low.
+ */
+ private boolean seekToPreviousRowStateless(Cell originalKey) throws
IOException {
+ boolean keepSeeking;
+ Cell key = originalKey;
+ do {
+ Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(key);
+ if (!seekBefore(keyAtBeginningOfRow)) {
+ return false;
+ }
+
+ Cell firstKeyOfPreviousRow =
PrivateCellUtil.createFirstOnRow(hfs.getCell());
+ if (!seekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (
+ !setReadpointAndSkipNewerKvs()
+ || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0
+ ) {
+ keepSeeking = true;
+ key = firstKeyOfPreviousRow;
+ } else {
+ keepSeeking = false;
+ }
+ } while (keepSeeking);
+ return true;
+ }
+
+ private boolean seekBefore(Cell seekKey) throws IOException {
+ if (seekCount != null) seekCount.increment();
+ if (!hfs.seekBefore(seekKey)) {
+ this.cur = null;
+ return false;
+ }
+
+ return true;
+ }
+
+ private boolean seekBeforeAndSaveKeyToPreviousRow(Cell seekKey) throws
IOException {
+ if (seekCount != null) seekCount.increment();
+ if (!hfs.seekBefore(seekKey)) {
+ // Since the above seek failed, we need to position ourselves back at
the start of the
+ // block or else our reseek might fail
+ if (!hfs.seekTo()) {
Review Comment:
> im not sure this fallback is necessary. according to javadoc, seekBefore
should only return false if seekKey is <= the first key in the storefile. In
that case, it's unlikely for the seekTo here to return false -- it only returns
false if the file is empty, but the file can't be empty if we got here (we
always will have succeeded at doing at least one seekBefore before calling this
method).
Nice catch, I'll make the change to go from
```java
if (!hfs.seekTo()) {
this.cur = null;
return false;
}
```
to just
```java
hfs.seekTo();
```
here.
> Is there a valid case where the seekKey is <= start of file, and it makes
sense to reset to the start of the file? What would we seek to? I think we'd
end up re-calling this method again with the same seekKey.
Yeah, this is actually a perfectly normal case that is flexed in the unit
test
[testReversibleScanners#testReversibleStoreFileScanner](https://github.com/apache/hbase/blob/7af61794a8eee25d9e48a2561dd0a6a321f48c0c/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestReversibleScanners.java#L109)
revealed existed for me (or at least that's the failing test which triggered
these changes. The test fails if you remove the `seekTo` here).
Consider two storefiles A & B which contain the rows {0, 3} & {1, 4}
respectively. After we call seekToPreviousRow on these, they'll be in the
states: {current=3, previousRow=0} and {current=4, previousRow=1} respectively.
On the call to `seekToPreviousRow(Cell of Row 3)` for storefile B, we will go
down the `seekToPreviousRowWithHint` path. The scanner for this storefile is
currently at row 4. When we call
`seekBeforeAndSaveKeyToPreviousRow(createFirstOnRow(Cell of Row 1))`,
`hfs.seekBefore` will return false and we'll set previousRow to `null`. The
scanner position will not have moved from row 4 so when we go to
`reseekAtOrAfter(createFirstOnRow(Cell of Row 1))`, it will return false. This
means that we'll effectively entirely skip row 1. I suppose we could
`seekAtOrAfter` in this method back to the correct place, but we'll still do an
additional reseek to the same place once we return up to
`seekToPreviousRowWithHint` so it seems easier to just reset to the beginning
of the blo
ck and let the `reseekAtOrAfter` take us to the desired position
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]