yihua commented on code in PR #11208:
URL: https://github.com/apache/hudi/pull/11208#discussion_r1599190553
##########
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java:
##########
@@ -31,34 +31,37 @@
public class HoodieSparkFileReaderFactory extends HoodieFileReaderFactory {
+ public HoodieSparkFileReaderFactory(StorageConfiguration<?> storageConf) {
+ super(storageConf);
+ }
+
@Override
- public HoodieFileReader newParquetFileReader(StorageConfiguration<?> conf,
StoragePath path) {
- conf.setIfUnset(SQLConf.PARQUET_BINARY_AS_STRING().key(),
SQLConf.PARQUET_BINARY_AS_STRING().defaultValueString());
- conf.setIfUnset(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(),
SQLConf.PARQUET_INT96_AS_TIMESTAMP().defaultValueString());
- conf.setIfUnset(SQLConf.CASE_SENSITIVE().key(),
SQLConf.CASE_SENSITIVE().defaultValueString());
+ public HoodieFileReader newParquetFileReader(StoragePath path) {
+ storageConf.setIfUnset(SQLConf.PARQUET_BINARY_AS_STRING().key(),
SQLConf.PARQUET_BINARY_AS_STRING().defaultValueString());
+ storageConf.setIfUnset(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(),
SQLConf.PARQUET_INT96_AS_TIMESTAMP().defaultValueString());
+ storageConf.setIfUnset(SQLConf.CASE_SENSITIVE().key(),
SQLConf.CASE_SENSITIVE().defaultValueString());
// Using string value of this conf to preserve compatibility across spark
versions.
- conf.setIfUnset("spark.sql.legacy.parquet.nanosAsLong", "false");
+ storageConf.setIfUnset("spark.sql.legacy.parquet.nanosAsLong", "false");
// This is a required config since Spark 3.4.0:
SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED
// Using string value of this conf to preserve compatibility across spark
versions.
- conf.setIfUnset("spark.sql.parquet.inferTimestampNTZ.enabled", "true");
- return new HoodieSparkParquetReader(conf, path);
+ storageConf.setIfUnset("spark.sql.parquet.inferTimestampNTZ.enabled",
"true");
+ return new HoodieSparkParquetReader(storageConf, path);
}
@Override
protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig,
- StorageConfiguration<?> conf,
StoragePath path,
Option<Schema> schemaOption)
throws IOException {
throw new HoodieIOException("Not support read HFile");
}
@Override
- protected HoodieFileReader newOrcFileReader(StorageConfiguration<?> conf,
StoragePath path) {
+ protected HoodieFileReader newOrcFileReader(StoragePath path) {
throw new HoodieIOException("Not support read orc file");
}
@Override
public HoodieFileReader newBootstrapFileReader(HoodieFileReader
skeletonFileReader, HoodieFileReader dataFileReader, Option<String[]>
partitionFields, Object[] partitionValues) {
return new HoodieSparkBootstrapFileReader(skeletonFileReader,
dataFileReader, partitionFields, partitionValues);
}
-}
+}
Review Comment:
nit: keep the new line.
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java:
##########
@@ -470,9 +470,9 @@ public void performMergeDataValidationCheck(WriteStatus
writeStatus) {
}
long oldNumWrites = 0;
- try (HoodieFileReader reader =
HoodieIOFactory.getIOFactory(storage.getConf())
+ try (HoodieFileReader reader =
HoodieIOFactory.getIOFactory(hoodieTable.getStorageConf())
Review Comment:
Is there a difference between `storage.getConf()` and
`hoodieTable.getStorageConf()`? Probably we can remove `HoodieStorage` and
`FileSystem` instances in the `HoodieIOHandle` (in a separate PR).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]