shangxinli commented on code in PR #374:
URL: https://github.com/apache/iceberg-cpp/pull/374#discussion_r2616320209
##########
src/iceberg/avro/avro_reader.cc:
##########
@@ -238,17 +277,54 @@ class AvroReader::Impl {
return arrow_array;
}
+ // Helper: Check if past sync point
+ bool IsPastSync() const {
+ if (!split_end_) return false;
+ return use_direct_decoder_ ? base_reader_->pastSync(split_end_.value())
+ : datum_reader_->pastSync(split_end_.value());
+ }
+
+ // Helper: Get metadata from appropriate reader
+ ::avro::Metadata GetReaderMetadata() const {
+ return use_direct_decoder_ ? base_reader_->metadata() :
datum_reader_->metadata();
+ }
+
+ // Helper: Close the appropriate reader
+ void CloseReader() {
+ if (use_direct_decoder_) {
+ if (base_reader_) {
+ base_reader_->close();
+ base_reader_.reset();
+ }
+ } else {
+ if (datum_reader_) {
+ datum_reader_->close();
+ datum_reader_.reset();
+ }
+ }
+ }
+
+ // Helper: Get reader schema
+ const ::avro::ValidSchema& GetReaderSchema() const {
+ return use_direct_decoder_ ? base_reader_->readerSchema()
+ : datum_reader_->readerSchema();
+ }
+
private:
// Max number of rows in the record batch to read.
int64_t batch_size_{};
+ // Whether to use direct decoder (true) or GenericDatum-based decoder
(false).
+ bool use_direct_decoder_{true};
// The end of the split to read and used to terminate the reading.
std::optional<int64_t> split_end_;
// The schema to read.
std::shared_ptr<::iceberg::Schema> read_schema_;
// The projection result to apply to the read schema.
SchemaProjection projection_;
- // The avro reader to read the data into a datum.
- std::unique_ptr<::avro::DataFileReader<::avro::GenericDatum>> reader_;
+ // The avro reader base - provides direct access to decoder (new path).
Review Comment:
Yeah, let's use direct/GenericDatum
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]