xiarixiaoyao commented on a change in pull request #4253:
URL: https://github.com/apache/hudi/pull/4253#discussion_r767376414
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java
##########
@@ -46,13 +53,32 @@
public HoodieRowParquetWriteSupport(Configuration conf, StructType
structType, BloomFilter bloomFilter, HoodieWriteConfig writeConfig) {
super();
Configuration hadoopConf = new Configuration(conf);
- hadoopConf.set("spark.sql.parquet.writeLegacyFormat",
writeConfig.parquetWriteLegacyFormatEnabled());
+ hadoopConf.set("spark.sql.parquet.writeLegacyFormat",
findSmallPrecisionDecimalType(structType) ? "true" :
writeConfig.parquetWriteLegacyFormatEnabled());
Review comment:
good suggestion, fixed
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java
##########
@@ -46,13 +53,32 @@
public HoodieRowParquetWriteSupport(Configuration conf, StructType
structType, BloomFilter bloomFilter, HoodieWriteConfig writeConfig) {
super();
Configuration hadoopConf = new Configuration(conf);
- hadoopConf.set("spark.sql.parquet.writeLegacyFormat",
writeConfig.parquetWriteLegacyFormatEnabled());
+ hadoopConf.set("spark.sql.parquet.writeLegacyFormat",
findSmallPrecisionDecimalType(structType) ? "true" :
writeConfig.parquetWriteLegacyFormatEnabled());
hadoopConf.set("spark.sql.parquet.outputTimestampType",
writeConfig.parquetOutputTimestampType());
this.hadoopConf = hadoopConf;
setSchema(structType, hadoopConf);
this.bloomFilter = bloomFilter;
}
+ // Now by default ParquetWriteSupport will write DecimalType to parquet as
int32/int64 when the scale of decimalType < Decimal.MAX_LONG_DIGITS(),
+ // but AvroParquetReader which used by HoodieParquetReader cannot support
read int32/int64 as DecimalType.
+ // try to find current sparkType whether contains that DecimalType.
+ private boolean findSmallPrecisionDecimalType(DataType sparkType) {
Review comment:
fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]