pavibhai commented on a change in pull request #635:
URL: https://github.com/apache/orc/pull/635#discussion_r578814097
##########
File path: java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
##########
@@ -1221,52 +1244,112 @@ private boolean advanceToNextRow(
@Override
public boolean nextBatch(VectorizedRowBatch batch) throws IOException {
try {
- if (rowInStripe >= rowCountInStripe) {
- currentStripe += 1;
- if (currentStripe >= stripes.size()) {
- batch.size = 0;
- return false;
+ int batchSize;
+
+ // do...while is required to handle the case where the filter eliminates
all rows in the
+ // batch
+ do {
+ if (rowInStripe >= rowCountInStripe) {
+ currentStripe += 1;
+ if (currentStripe >= stripes.size()) {
+ batch.size = 0;
+ return false;
+ }
+ // Read stripe in Memory
+ readStripe();
+ followRowInStripe = rowInStripe;
}
- // Read stripe in Memory
- readStripe();
- }
- int batchSize = computeBatchSize(batch.getMaxSize());
- rowInStripe += batchSize;
- reader.setVectorColumnCount(batch.getDataColumnCount());
- reader.nextBatch(batch, batchSize);
- advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true);
- // batch.size can be modified by filter so only batchSize can tell if we
actually read rows
+ batchSize = computeBatchSize(batch.getMaxSize());
+ reader.setVectorColumnCount(batch.getDataColumnCount());
+ reader.nextBatch(batch, batchSize, readLevel);
+ if (readLevel == ReadLevel.LEAD && batch.size > 0) {
+ prepareFollowingStreams(rowInStripe, followRowInStripe);
Review comment:
added
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]