This is an automated email from the ASF dual-hosted git repository.
yihua pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
from e5c3ac1bfcf2 docs: Claim RFC-81: Introduce Primary Key Sorted Table
(#14245)
add fd79a1682e7e fix(ingest): Repair affected logical timestamp milli
tables (#14161)
No new revisions were added by this update.
Summary of changes:
.../java/org/apache/hudi/table/HoodieTable.java | 4 +-
.../hadoop/TestHoodieFileGroupReaderOnHive.java | 2 +-
.../hudi/testutils/ArrayWritableTestUtil.java | 7 +-
.../hudi/io/storage/HoodieSparkParquetReader.java | 40 +-
.../storage/row/HoodieRowParquetWriteSupport.java | 6 +-
.../org/apache/hudi/AvroConversionUtils.scala | 2 +-
.../scala/org/apache/hudi/HoodieSparkUtils.scala | 4 +-
.../SparkFileFormatInternalRowReaderContext.scala | 20 +-
.../datasources/SparkColumnarFileReader.scala | 5 +-
.../parquet/HoodieParquetFileFormatHelper.scala | 5 +-
.../parquet/HoodieParquetReadSupport.scala | 15 +-
.../hudi/MultipleColumnarFileFormatReader.scala | 8 +-
.../keygen/TestTimestampBasedKeyGenerator.java | 4 +-
.../org/apache/hudi/avro/AvroRecordContext.java | 2 +-
.../AvroSchemaComparatorForRecordProjection.java | 4 +-
.../java/org/apache/hudi/avro/AvroSchemaUtils.java | 18 +-
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 16 +-
.../hudi/common/engine/HoodieReaderContext.java | 9 +
.../hudi/common/table/PartitionPathParser.java | 4 +-
.../table/log/block/HoodieAvroDataBlock.java | 29 +-
.../convert/AvroInternalSchemaConverter.java | 4 +-
.../hudi/metadata/HoodieTableMetadataUtil.java | 48 +-
.../java/org/apache/hudi/stats/ValueMetadata.java | 4 +-
.../main/java/org/apache/hudi/stats/ValueType.java | 2 +-
.../apache/parquet/schema/AvroSchemaRepair.java | 238 +++++
.../org/apache/hudi/avro/TestHoodieAvroUtils.java | 6 +-
.../common/testutils/HoodieTestDataGenerator.java | 2 +-
.../hudi/metadata/TestHoodieTableMetadataUtil.java | 159 +++-
.../parquet/schema/TestAvroSchemaRepair.java | 983 +++++++++++++++++++++
.../hudi/io/hadoop/HoodieAvroParquetReader.java | 13 +-
.../avro/HoodieAvroParquetReaderBuilder.java | 10 +-
.../apache/parquet/avro/HoodieAvroReadSupport.java | 13 +-
.../org/apache/parquet/schema/SchemaRepair.java | 162 ++++
.../parquet/avro/TestAvroSchemaConverter.java | 954 ++++++++++++++++++++
.../apache/parquet/schema/TestSchemaRepair.java | 600 +++++++++++++
.../schema/TestSchemaRepairEquivalence.java | 481 ++++++++++
.../src/test/resources/parquet-java/all.avsc | 110 +++
.../parquet-java/allFromParquetNewBehavior.avsc | 108 +++
.../parquet-java/allFromParquetOldBehavior.avsc | 117 +++
.../test/resources/parquet-java/fixedToInt96.avsc | 97 ++
.../hadoop/hive/serde2/avro/HiveTypeUtils.java | 2 +-
.../hudi/hadoop/HiveHoodieReaderContext.java | 7 +-
.../HoodieFileGroupReaderBasedRecordReader.java | 3 +-
.../hudi/hadoop/HoodieParquetInputFormat.java | 8 +-
.../hudi/hadoop/avro/HoodieAvroParquetReader.java | 51 +-
.../HoodieTimestampAwareParquetInputFormat.java | 7 +-
.../hudi/hadoop/utils/HiveAvroSerializer.java | 10 +-
.../hadoop/utils/HoodieArrayWritableAvroUtils.java | 6 +-
.../org/apache/hudi/ColumnStatsIndexSupport.scala | 92 +-
.../org/apache/hudi/ExpressionIndexSupport.scala | 6 +-
.../scala/org/apache/hudi/HoodieFileIndex.scala | 4 +-
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 4 +-
.../apache/hudi/PartitionStatsIndexSupport.scala | 23 +-
.../org/apache/hudi/SparkBaseIndexSupport.scala | 14 +-
.../apache/hudi/SparkHoodieTableFileIndex.scala | 17 +-
.../org/apache/hudi/cdc/CDCFileGroupIterator.scala | 11 +-
.../datasources/orc/SparkOrcReaderBase.scala | 2 +-
.../HoodieFileGroupReaderBasedFileFormat.scala | 35 +-
.../parquet/SparkParquetReaderBase.scala | 11 +-
.../hudi/command/payload/ExpressionPayload.scala | 10 +-
.../ShowColumnStatsOverlapProcedure.scala | 19 +-
.../ShowMetadataTableColumnStatsProcedure.scala | 7 +-
.../functional/TestSparkSortAndSizeClustering.java | 4 +-
.../trips_logical_types_json_cow_read_v6.zip | Bin 0 -> 103597 bytes
.../trips_logical_types_json_cow_read_v8.zip | Bin 0 -> 139172 bytes
.../trips_logical_types_json_cow_read_v9.zip | Bin 0 -> 200295 bytes
.../trips_logical_types_json_mor_read_v6.zip | Bin 0 -> 48290 bytes
..._logical_types_json_mor_read_v6_parquet_log.zip | Bin 0 -> 55640 bytes
.../trips_logical_types_json_mor_read_v8.zip | Bin 0 -> 68870 bytes
..._logical_types_json_mor_read_v8_parquet_log.zip | Bin 0 -> 76217 bytes
.../trips_logical_types_json_mor_read_v9.zip | Bin 0 -> 68838 bytes
..._logical_types_json_mor_read_v9_parquet_log.zip | Bin 0 -> 76199 bytes
.../hudi/TestAvroSchemaResolutionSupport.scala | 8 +-
.../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 1 +
.../hudi/functional/ColumnStatIndexTestBase.scala | 6 +-
.../functional/PartitionStatsIndexTestBase.scala | 2 +
.../apache/hudi/functional/TestCOWDataSource.scala | 75 +-
.../hudi/functional/TestColumnStatsIndex.scala | 17 +-
.../functional/TestColumnStatsIndexWithSQL.scala | 5 +-
.../apache/hudi/functional/TestMORDataSource.scala | 80 +-
.../hudi/functional/TestPartitionStatsIndex.scala | 16 +-
.../Spark33LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark33ParquetReader.scala | 43 +-
.../Spark34LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark34ParquetReader.scala | 41 +-
.../Spark35LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark35ParquetReader.scala | 41 +-
.../Spark40LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark40ParquetReader.scala | 40 +-
.../utilities/HoodieMetadataTableValidator.java | 5 +-
.../deltastreamer/TestHoodieDeltaStreamer.java | 333 +++++++
.../src/test/resources/logical-repair/README.md | 88 ++
.../logical-repair/cow_write_updates/2/data.json | 6 +
.../logical-repair/cow_write_updates/3/data.json | 3 +
.../logical-repair/mor_write_updates/5/data.json | 3 +
.../src/test/resources/logical-repair/schema.avsc | 112 +++
.../trips_logical_types_json_cow_write.zip | Bin 0 -> 61182 bytes
...trips_logical_types_json_mor_write_avro_log.zip | Bin 0 -> 54841 bytes
...ps_logical_types_json_mor_write_parquet_log.zip | Bin 0 -> 59817 bytes
99 files changed, 5326 insertions(+), 271 deletions(-)
create mode 100644
hudi-common/src/main/java/org/apache/parquet/schema/AvroSchemaRepair.java
create mode 100644
hudi-common/src/test/java/org/apache/parquet/schema/TestAvroSchemaRepair.java
create mode 100644
hudi-hadoop-common/src/main/java/org/apache/parquet/schema/SchemaRepair.java
create mode 100644
hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
create mode 100644
hudi-hadoop-common/src/test/java/org/apache/parquet/schema/TestSchemaRepair.java
create mode 100644
hudi-hadoop-common/src/test/java/org/apache/parquet/schema/TestSchemaRepairEquivalence.java
create mode 100644 hudi-hadoop-common/src/test/resources/parquet-java/all.avsc
create mode 100644
hudi-hadoop-common/src/test/resources/parquet-java/allFromParquetNewBehavior.avsc
create mode 100644
hudi-hadoop-common/src/test/resources/parquet-java/allFromParquetOldBehavior.avsc
create mode 100644
hudi-hadoop-common/src/test/resources/parquet-java/fixedToInt96.avsc
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_cow_read_v6.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_cow_read_v8.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_cow_read_v9.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v6.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v6_parquet_log.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v8.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v8_parquet_log.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v9.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v9_parquet_log.zip
create mode 100644 hudi-utilities/src/test/resources/logical-repair/README.md
create mode 100644
hudi-utilities/src/test/resources/logical-repair/cow_write_updates/2/data.json
create mode 100644
hudi-utilities/src/test/resources/logical-repair/cow_write_updates/3/data.json
create mode 100644
hudi-utilities/src/test/resources/logical-repair/mor_write_updates/5/data.json
create mode 100644 hudi-utilities/src/test/resources/logical-repair/schema.avsc
create mode 100644
hudi-utilities/src/test/resources/logical-repair/trips_logical_types_json_cow_write.zip
create mode 100644
hudi-utilities/src/test/resources/logical-repair/trips_logical_types_json_mor_write_avro_log.zip
create mode 100644
hudi-utilities/src/test/resources/logical-repair/trips_logical_types_json_mor_write_parquet_log.zip