wgtmac commented on code in PR #1114:
URL: https://github.com/apache/orc/pull/1114#discussion_r874292418
##########
c++/src/Reader.cc:
##########
@@ -391,27 +392,39 @@ namespace orc {
return;
}
- currentStripe = seekToStripe;
- currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
previousRow = rowNumber;
- startNextStripe();
+ auto rowIndexStride = footer->rowindexstride();
+ if (!isCurrentStripeInited()
+ || currentStripe != seekToStripe
+ || rowIndexStride == 0
+ || currentStripeInfo.indexlength() == 0) {
+ // current stripe is not initialized or
+ // target stripe is not current stripe or
+ // current stripe doesn't have row indexes
+ currentStripe = seekToStripe;
+ currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
+ startNextStripe();
+ if (currentStripe >= lastStripe) {
+ return;
+ }
+ } else {
+ currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
+ if (sargsApplier) {
+ // advance to selected row group if predicate pushdown is enabled
+ currentRowInStripe = advanceToNextRowGroup(currentRowInStripe,
+ rowsInCurrentStripe,
+ footer->rowindexstride(),
+
sargsApplier->getNextSkippedRows());
+ }
+ }
uint64_t rowsToSkip = currentRowInStripe;
- auto rowIndexStride = footer->rowindexstride();
// seek to the target row group if row indexes exists
if (rowIndexStride > 0 && currentStripeInfo.indexlength() > 0) {
- // when predicate push down is enabled, above call to startNextStripe()
- // will move current row to 1st matching row group; here we only need
- // to deal with the case when PPD is not enabled.
- if (!sargsApplier) {
- if (rowIndexes.empty()) {
- loadStripeIndex();
- }
- auto rowGroupId = static_cast<uint32_t>(rowsToSkip / rowIndexStride);
- if (rowGroupId != 0) {
- seekToRowGroup(rowGroupId);
- }
+ if (rowIndexes.empty()) {
+ loadStripeIndex();
}
+ seekToRowGroup(static_cast<uint32_t>(rowsToSkip / rowIndexStride));
Review Comment:
If somehow it fails to parse the row index or finds some missing columns in
the call of loadStripeIndex(), the seekToRowGroup here cannot proceed. Maybe
add a TODO here to mark a future improvement.
##########
c++/src/Reader.hh:
##########
@@ -146,6 +146,7 @@ namespace orc {
uint64_t firstStripe;
uint64_t currentStripe;
uint64_t lastStripe; // the stripe AFTER the last one
+ uint64_t currentInitedStripe;
Review Comment:
The naming is a little bit casual, maybe processingStripe?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]