danielxjd commented on a change in pull request #12786: URL: https://github.com/apache/beam/pull/12786#discussion_r485262811
########## File path: sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java ########## @@ -336,36 +388,41 @@ public void processElement( + tracker.currentRestriction().getFrom() + " to " + tracker.currentRestriction().getTo()); - ParquetReadOptions options = HadoopReadOptions.builder(getConfWithModelClass()).build(); - ParquetFileReader reader = - ParquetFileReader.open(new BeamParquetInputFile(file.openSeekable()), options); + Configuration conf = getConfWithModelClass(); GenericData model = null; if (modelClass != null) { model = (GenericData) modelClass.getMethod("get").invoke(null); } - ReadSupport<GenericRecord> readSupport = new AvroReadSupport<GenericRecord>(model); - + AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<GenericRecord>(model); + if (requestSchemaString != null) { + AvroReadSupport.setRequestedProjection( + conf, new Schema.Parser().parse(requestSchemaString)); + } + ParquetReadOptions options = HadoopReadOptions.builder(conf).build(); + ParquetFileReader reader = + ParquetFileReader.open(new BeamParquetInputFile(file.openSeekable()), options); Filter filter = checkNotNull(options.getRecordFilter(), "filter"); Configuration hadoopConf = ((HadoopReadOptions) options).getConf(); + for (String property : options.getPropertyNames()) { + hadoopConf.set(property, options.getProperty(property)); + } FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); MessageType fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); - ReadSupport.ReadContext readContext = readSupport.init( new InitContext( hadoopConf, Maps.transformValues(fileMetadata, ImmutableSet::of), fileSchema)); ColumnIOFactory columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); - MessageType requestedSchema = readContext.getRequestedSchema(); + RecordMaterializer<GenericRecord> recordConverter = readSupport.prepareForRead(hadoopConf, fileMetadata, fileSchema, readContext); - reader.setRequestedSchema(requestedSchema); Review comment: Maybe I should leave this line ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org