This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch branch-0.x in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 695577bdc958c4edf7a81b306ea75ab0d3116c03 Author: wombatu-kun <[email protected]> AuthorDate: Thu Mar 7 12:31:56 2024 +0700 [HUDI-7356] Passing configs to file reader constructor for flexibility (#10698) Co-authored-by: Vova Kolmakov <[email protected]> --- .../io/storage/HoodieSparkFileReaderFactory.java | 9 +++++++-- .../hudi/io/storage/HoodieAvroFileReaderFactory.java | 13 +++++++++---- .../hudi/io/storage/HoodieFileReaderFactory.java | 20 ++++++++++---------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java index f981061ecc3..d06b6913905 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java @@ -18,6 +18,7 @@ package org.apache.hudi.io.storage; +import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieIOException; @@ -30,7 +31,8 @@ import java.io.IOException; public class HoodieSparkFileReaderFactory extends HoodieFileReaderFactory { - protected HoodieFileReader newParquetFileReader(Configuration conf, Path path) { + @Override + public HoodieFileReader newParquetFileReader(Configuration conf, Path path) { conf.setIfUnset(SQLConf.PARQUET_BINARY_AS_STRING().key(), SQLConf.PARQUET_BINARY_AS_STRING().defaultValueString()); conf.setIfUnset(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), SQLConf.PARQUET_INT96_AS_TIMESTAMP().defaultValueString()); conf.setIfUnset(SQLConf.CASE_SENSITIVE().key(), SQLConf.CASE_SENSITIVE().defaultValueString()); @@ -42,12 +44,15 @@ public class HoodieSparkFileReaderFactory extends HoodieFileReaderFactory { return new HoodieSparkParquetReader(conf, path); } - protected HoodieFileReader newHFileFileReader(Configuration conf, + @Override + protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig, + Configuration conf, Path path, Option<Schema> schemaOption) throws IOException { throw new HoodieIOException("Not support read HFile"); } + @Override protected HoodieFileReader newOrcFileReader(Configuration conf, Path path) { throw new HoodieIOException("Not support read orc file"); } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java index 0a511d10b03..84aed905a4d 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java @@ -18,6 +18,7 @@ package org.apache.hudi.io.storage; +import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.util.Option; import org.apache.avro.Schema; @@ -29,15 +30,18 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig; import java.io.IOException; public class HoodieAvroFileReaderFactory extends HoodieFileReaderFactory { + + @Override protected HoodieFileReader newParquetFileReader(Configuration conf, Path path) { return new HoodieAvroParquetReader(conf, path); } - protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + @Override + protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig, Configuration conf, Path path, Option<Schema> schemaOption) throws IOException { - if (useNativeHFileReader) { + if (isUseNativeHFileReaderEnabled(hoodieConfig)) { return new HoodieNativeAvroHFileReader(conf, path, schemaOption); } CacheConfig cacheConfig = new CacheConfig(conf); @@ -47,14 +51,15 @@ public class HoodieAvroFileReaderFactory extends HoodieFileReaderFactory { return new HoodieHBaseAvroHFileReader(conf, path, cacheConfig); } - protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + @Override + protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig, Configuration conf, Path path, FileSystem fs, byte[] content, Option<Schema> schemaOption) throws IOException { - if (useNativeHFileReader) { + if (isUseNativeHFileReaderEnabled(hoodieConfig)) { return new HoodieNativeAvroHFileReader(conf, content, schemaOption); } CacheConfig cacheConfig = new CacheConfig(conf); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java index f4b4bedc468..ac2736f8829 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java @@ -84,11 +84,9 @@ public class HoodieFileReaderFactory { Option<Schema> schemaOption) throws IOException { switch (format) { case PARQUET: - return this.newParquetFileReader(conf, path); + return newParquetFileReader(conf, path); case HFILE: - boolean useNativeHFileReader = - hoodieConfig.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER); - return newHFileFileReader(useNativeHFileReader, conf, path, schemaOption); + return newHFileFileReader(hoodieConfig, conf, path, schemaOption); case ORC: return newOrcFileReader(conf, path); default: @@ -96,15 +94,13 @@ public class HoodieFileReaderFactory { } } - public HoodieFileReader getContentReader(HoodieConfig config, + public HoodieFileReader getContentReader(HoodieConfig hoodieConfig, Configuration conf, Path path, HoodieFileFormat format, FileSystem fs, byte[] content, Option<Schema> schemaOption) throws IOException { switch (format) { case HFILE: - boolean useNativeHFileReader = - config.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER); - return newHFileFileReader(useNativeHFileReader, conf, path, fs, content, schemaOption); + return newHFileFileReader(hoodieConfig, conf, path, fs, content, schemaOption); default: throw new UnsupportedOperationException(format + " format not supported yet."); } @@ -114,13 +110,13 @@ public class HoodieFileReaderFactory { throw new UnsupportedOperationException(); } - protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig, Configuration conf, Path path, Option<Schema> schemaOption) throws IOException { throw new UnsupportedOperationException(); } - protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig, Configuration conf, Path path, FileSystem fs, byte[] content, Option<Schema> schemaOption) @@ -138,4 +134,8 @@ public class HoodieFileReaderFactory { Object[] partitionValues) { throw new UnsupportedOperationException(); } + + protected static boolean isUseNativeHFileReaderEnabled(HoodieConfig hoodieConfig) { + return hoodieConfig.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER); + } }
