bbeaudreault commented on code in PR #5373:
URL: https://github.com/apache/hbase/pull/5373#discussion_r1387001876
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java:
##########
@@ -486,52 +500,184 @@ public boolean shouldUseScanner(Scan scan, HStore store,
long oldestUnexpiredTS)
@Override
public boolean seekToPreviousRow(Cell originalKey) throws IOException {
try {
- try {
- boolean keepSeeking = false;
- Cell key = originalKey;
- do {
- Cell seekKey = PrivateCellUtil.createFirstOnRow(key);
- if (seekCount != null) seekCount.increment();
- if (!hfs.seekBefore(seekKey)) {
- this.cur = null;
- return false;
- }
- Cell curCell = hfs.getCell();
- Cell firstKeyOfPreviousRow =
PrivateCellUtil.createFirstOnRow(curCell);
-
- if (seekCount != null) seekCount.increment();
- if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
- this.cur = null;
- return false;
- }
-
- setCurrentCell(hfs.getCell());
- this.stopSkippingKVsIfNextRow = true;
- boolean resultOfSkipKVs;
- try {
- resultOfSkipKVs = skipKVsNewerThanReadpoint();
- } finally {
- this.stopSkippingKVsIfNextRow = false;
- }
- if (!resultOfSkipKVs || getComparator().compareRows(cur,
firstKeyOfPreviousRow) > 0) {
- keepSeeking = true;
- key = firstKeyOfPreviousRow;
- continue;
- } else {
- keepSeeking = false;
- }
- } while (keepSeeking);
- return true;
- } finally {
- realSeekDone = true;
+ if (isFastSeekingEncoding) {
+ return seekToPreviousRowStateless(originalKey);
+ } else if (previousRow == null ||
getComparator().compareRows(previousRow, originalKey) > 0) {
+ return seekToPreviousRowWithoutHint(originalKey);
+ } else {
+ return seekToPreviousRowWithHint();
}
} catch (FileNotFoundException e) {
throw e;
} catch (IOException ioe) {
throw new IOException("Could not seekToPreviousRow " + this + " to key "
+ originalKey, ioe);
+ } finally {
+ this.realSeekDone = true;
}
}
+ /**
+ * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)}
method requires one seek
+ * and one reseek. This method maintains state in {@link
StoreFileScanner#previousRow} which only
+ * makes sense in the context of a sequential row-by-row reverse scan.
+ * {@link StoreFileScanner#previousRow} should be reset if that is not the
case. The reasoning for
+ * why this method is faster than {@link
StoreFileScanner#seekToPreviousRowStateless(Cell)} is
+ * that seeks are slower as they need to start from the beginning of the
file, while reseeks go
+ * forward from the current position.
+ */
+ private boolean seekToPreviousRowWithHint() throws IOException {
+ do {
+ Cell firstKeyOfPreviousRow =
PrivateCellUtil.createFirstOnRow(previousRow);
+ if (!seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+ return true;
+ }
+
+ if (previousRow == null) {
+ return seekToPreviousRowWithoutHint(firstKeyOfPreviousRow);
+ }
+ } while (true);
+ }
+
+ /**
+ * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)}
method requires two seeks
+ * and one reseek. The extra expense/seek is with the intent of speeding up
subsequent calls by
+ * using the {@link StoreFileScanner#seekToPreviousRowWithHint} which this
method seeds the state
+ * for by setting {@link StoreFileScanner#previousRow}
+ */
+ private boolean seekToPreviousRowWithoutHint(Cell originalKey) throws
IOException {
+ Cell key = originalKey;
+ do {
+ // Rewind to the cell before the beginning of this row
+ Cell keyAtBeginningOfRow = PrivateCellUtil.createFirstOnRow(key);
+ if (!seekBefore(keyAtBeginningOfRow)) {
+ return false;
+ }
+
+ // Rewind before this row and save what we find as a seek hint
+ Cell firstKeyOfPreviousRow =
PrivateCellUtil.createFirstOnRow(hfs.getCell());
+ if (!seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ // Seek back to the start of the previous row
+ if (!reseekAtOrAfter(firstKeyOfPreviousRow)) {
+ return false;
+ }
+
+ if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) {
+ return true;
+ }
+
+ if (previousRow != null) {
Review Comment:
can you add a comment here too? this code is complicated despite looking
simple :)
My current understanding is: Above we found previousRow, then seeked forward
to the row we actually wanted. Then we skipped any too-new KVs, which happened
to be the whole row. So now we want to continue backwards, and we can
conveniently use the previously stashed previousRow for that.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]