deniskuzZ commented on code in PR #5549: URL: https://github.com/apache/hive/pull/5549#discussion_r2036859817
########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveDeleteFilter.java: ########## @@ -85,105 +86,98 @@ protected InputFile getInputFile(String location) { */ public CloseableIterable<HiveBatchContext> filterBatch(CloseableIterable<HiveBatchContext> batches) { + CloseableIterator<HiveBatchContext> iterator = new DeleteFilterBatchIterator(batches); + + return new CloseableIterable<HiveBatchContext>() { + + @Override + public CloseableIterator<HiveBatchContext> iterator() { + return iterator; + } + + @Override + public void close() throws IOException { + iterator.close(); + } + }; + } + + // VRB iterator with the delete filter + private class DeleteFilterBatchIterator implements CloseableIterator<HiveBatchContext> { + // Delete filter pipeline setup logic: // A HiveRow iterable (deleteInputIterable) is provided as input iterable for the DeleteFilter. // The content in deleteInputIterable is provided by row iterators from the incoming VRBs i.e. on the arrival of // a new batch the underlying iterator gets swapped. - SwappableHiveRowIterable deleteInputIterable = new SwappableHiveRowIterable(); + private final SwappableHiveRowIterable deleteInputIterable; // Output iterable of DeleteFilter, and its iterator - CloseableIterable<HiveRow> deleteOutputIterable = filter(deleteInputIterable); - CloseableIterator<HiveRow> deleteOutputIterator = deleteOutputIterable.iterator(); + private final CloseableIterable<HiveRow> deleteOutputIterable; - return new CloseableIterable<HiveBatchContext>() { + private final CloseableIterator<HiveBatchContext> srcIterator; - @Override - public CloseableIterator<HiveBatchContext> iterator() { + DeleteFilterBatchIterator(CloseableIterable<HiveBatchContext> batches) { + deleteInputIterable = new SwappableHiveRowIterable(); + deleteOutputIterable = filter(deleteInputIterable); + srcIterator = batches.iterator(); + } - CloseableIterator<HiveBatchContext> srcIterator = batches.iterator(); + @Override + public boolean hasNext() { + return srcIterator.hasNext(); + } - return new CloseableIterator<HiveBatchContext>() { + @Override + public HiveBatchContext next() { + try { + if (!hasNext()) { + throw new NoSuchElementException(); + } + HiveBatchContext batchContext = srcIterator.next(); + VectorizedRowBatch batch = batchContext.getBatch(); - @Override - public boolean hasNext() { - return srcIterator.hasNext(); - } + int oldSize = batch.size; + int newSize = 0; - @Override - public HiveBatchContext next() { - try { - if (!hasNext()) { - throw new NoSuchElementException(); - } - HiveBatchContext currentBatchContext = srcIterator.next(); - deleteInputIterable.currentRowIterator = currentBatchContext.rowIterator(); - VectorizedRowBatch batch = currentBatchContext.getBatch(); - - int oldSize = batch.size; - int newSize = 0; - - // Apply delete filtering and adjust the selected array so that undeleted row indices are filled with it. - while (deleteOutputIterator.hasNext()) { - HiveRow row = deleteOutputIterator.next(); - if (!row.isDeleted()) { - batch.selected[newSize++] = row.physicalBatchIndex(); - } - } - - if (newSize < oldSize) { - batch.size = newSize; - batch.selectedInUse = true; - } - return currentBatchContext; - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } + try (CloseableIterator<HiveRow> rowIterator = batchContext.rowIterator()) { + deleteInputIterable.currentRowIterator = rowIterator; - @Override - public void close() throws IOException { - srcIterator.close(); + // Apply delete filtering and adjust the selected array so that undeleted row indices are filled with it. + for (HiveRow row : deleteOutputIterable) { + if (!row.isDeleted()) { + batch.selected[newSize++] = row.physicalBatchIndex(); + } } - }; + } + if (newSize < oldSize) { + batch.size = newSize; + batch.selectedInUse = true; + } + return batchContext; + } catch (IOException e) { + throw new UncheckedIOException(e); } + } - @Override - public void close() throws IOException { - batches.close(); - } - }; + @Override + public void close() throws IOException { + IOUtils.close(srcIterator, deleteOutputIterable); + } } // HiveRow iterable that wraps an interchangeable source HiveRow iterable - static class SwappableHiveRowIterable implements CloseableIterable<HiveRow> { + static final class SwappableHiveRowIterable implements CloseableIterable<HiveRow> { Review Comment: i think if was a practice to mark `private static` nested classes as final, even sonar recommended that as perf optimization for compiler, however, I wasn't able to find any article on that. Adding just private modifier -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org