cdmikechen commented on code in PR #7173:
URL: https://github.com/apache/hudi/pull/7173#discussion_r1218771920
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java:
##########
@@ -52,12 +59,36 @@ public class HoodieParquetInputFormat extends
HoodieParquetInputFormatBase {
private static final Logger LOG =
LoggerFactory.getLogger(HoodieParquetInputFormat.class);
+ private boolean supportAvroRead = true;
+
public HoodieParquetInputFormat() {
super(new HoodieCopyOnWriteTableInputFormat());
+ initAvroInputFormat();
}
protected HoodieParquetInputFormat(HoodieCopyOnWriteTableInputFormat
delegate) {
super(delegate);
+ initAvroInputFormat();
+ }
+
+ /**
+ * Spark2 use `parquet.hadoopParquetInputFormat` in
`com.twitter:parquet-hadoop-bundle`.
+ * So that we need to distinguish the constructions of classes with
+ * `parquet.hadoopParquetInputFormat` or
`org.apache.parquet.hadoop.ParquetInputFormat`.
+ * If we use `org.apache.parquet:parquet-hadoop`, we can use
`HudiAvroParquetInputFormat`
+ * in Hive or Spark3 to get timestamp with correct type.
+ */
+ private void initAvroInputFormat() {
+ try {
+ Constructor[] constructors =
ParquetRecordReaderWrapper.class.getConstructors();
+ if (Arrays.stream(constructors)
+ .anyMatch(c -> c.getParameterCount() > 0 && c.getParameterTypes()[0]
+ .getName().equals(ParquetInputFormat.class.getName()))) {
+ supportAvroRead = true;
Review Comment:
In my impression, hive2 and spark3 have the same processing method and
constructor, but there is a difference in hive3
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]