prakharjain09 commented on a change in pull request #945:
URL: https://github.com/apache/parquet-mr/pull/945#discussion_r815008136
##########
File path:
parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
##########
@@ -265,4 +273,51 @@ public boolean nextKeyValue() throws IOException,
InterruptedException {
return Collections.unmodifiableMap(setMultiMap);
}
+ /**
+ * Returns the ROW_INDEX of the current row.
+ */
+ public long getCurrentRowIndex() {
+ if (current == 0L) {
+ throw new RowIndexFetchedWithoutProcessingRowException("row index can be
fetched only after processing a row");
+ }
+ if (rowIdxInFileItr == null) {
+ throw new RowIndexNotSupportedException("underlying page read store
implementation" +
+ " doesn't support row index generation");
+ }
+ return currentRowIdx;
+ }
+
+ /**
+ * Resets the row index iterator based on the current processed row group.
+ */
+ private void resetRowIndexIterator(PageReadStore pages) {
+ Optional<Long> rowGroupRowIdxOffset = pages.getRowIndexOffset();
+ currentRowIdx = -1L;
+ if (rowGroupRowIdxOffset.isPresent()) {
+ final PrimitiveIterator.OfLong rowIdxInRowGroupItr;
+ if (pages.getRowIndexes().isPresent()) {
+ rowIdxInRowGroupItr = pages.getRowIndexes().get();
+ } else {
+ // If `pages.getRowIndexes()` is empty, this means column indexing has
not triggered.
Review comment:
removed this code comment.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]