boyuanzz commented on a change in pull request #12223: URL: https://github.com/apache/beam/pull/12223#discussion_r466020446
########## File path: sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java ########## @@ -154,10 +187,15 @@ public static ReadFiles readFiles(Schema schema) { abstract @Nullable GenericData getAvroDataModel(); + abstract boolean getSplit(); Review comment: `isSplittable`? ########## File path: sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java ########## @@ -187,12 +229,18 @@ public Read withAvroDataModel(GenericData model) { @Override public PCollection<GenericRecord> expand(PBegin input) { checkNotNull(getFilepattern(), "Filepattern cannot be null."); - - return input - .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) - .apply(FileIO.matchAll()) - .apply(FileIO.readMatches()) - .apply(readFiles(getSchema()).withAvroDataModel(getAvroDataModel())); + PCollection<FileIO.ReadableFile> inputFiles = + input + .apply( + "Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) + .apply(FileIO.matchAll()) + .apply(FileIO.readMatches()); + if (!getSplit()) { Review comment: Should it be `if (getSplit())` ? ########## File path: sdks/java/io/parquet/src/main/java/org/apache/beam/sdk/io/parquet/ParquetIO.java ########## @@ -187,12 +229,18 @@ public Read withAvroDataModel(GenericData model) { @Override public PCollection<GenericRecord> expand(PBegin input) { checkNotNull(getFilepattern(), "Filepattern cannot be null."); - - return input - .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) - .apply(FileIO.matchAll()) - .apply(FileIO.readMatches()) - .apply(readFiles(getSchema()).withAvroDataModel(getAvroDataModel())); + PCollection<FileIO.ReadableFile> inputFiles = + input + .apply( + "Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) + .apply(FileIO.matchAll()) + .apply(FileIO.readMatches()); + if (!getSplit()) { + return inputFiles.apply( + readFiles(getSchema()).withSplit().withAvroDataModel(getAvroDataModel())); + } else { Review comment: We can drop this redundant `else`. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org