usberkeley commented on code in PR #11924:
URL: https://github.com/apache/hudi/pull/11924#discussion_r1754395243
##########
hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java:
##########
@@ -224,6 +249,168 @@ public IndexedRecord next() {
}
}
+ /**
+ * StreamingRecordIterator is an iterator for reading records from a Hoodie
log block in a streaming manner.
+ * It reads data from a given input stream, decodes Avro records, and
supports schema promotion.
+ *
+ * This iterator ensures that the buffer has enough data for each record and
handles buffer management,
+ * including compaction and resizing when necessary.
+ */
+ private static class StreamingRecordIterator implements
ClosableIterator<IndexedRecord> {
+ private static final int RECORD_LENGTH_BYTES = 4;
+ // The minimum buffer size in bytes
+ private static final int MIN_BUFFER_SIZE = RECORD_LENGTH_BYTES;
+ private final SeekableDataInputStream inputStream;
+ private final GenericDatumReader<IndexedRecord> reader;
+ private final ThreadLocal<BinaryDecoder> decoderCache = new
ThreadLocal<>();
+ private Option<Schema> promotedSchema = Option.empty();
+ private int totalRecords = 0;
+ private int readRecords = 0;
+ private ByteBuffer buffer;
+
+ private StreamingRecordIterator(Schema readerSchema, Schema writerSchema,
SeekableDataInputStream inputStream,
+ HoodieLogBlockContentLocation contentLocation, int bufferSize) throws
IOException {
+ checkArgument(readerSchema != null, "Reader schema must not be null");
+ checkArgument(writerSchema != null, "Writer schema must not be null");
+ checkArgument(inputStream != null, "Input stream must not be null");
+ checkArgument(contentLocation != null, "Content location must not be
null");
Review Comment:
> We can eliminate these checks if we are sure these params could never be
null.
Got it, thanks
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]