This is an automated email from the ASF dual-hosted git repository.
yihua pushed a change to branch release-1.1.0
in repository https://gitbox.apache.org/repos/asf/hudi.git
from f11bef61e823 fix: Disable positional merging for spark version < 3.5
(#14241)
new cab988cc67e7 fix(ingest): Repair affected logical timestamp milli
tables (#14161)
new bcb5eaacabfb fix: Update metadata table record level index config keys
naming for standardization (#14244)
new 5e58dccdd963 Improve staging release scripts
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../org/apache/hudi/config/HoodieWriteConfig.java | 16 +-
.../metadata/HoodieBackedTableMetadataWriter.java | 14 +-
.../java/org/apache/hudi/table/HoodieTable.java | 4 +-
.../hadoop/TestHoodieFileGroupReaderOnHive.java | 2 +-
.../hudi/testutils/ArrayWritableTestUtil.java | 7 +-
.../hudi/io/storage/HoodieSparkParquetReader.java | 40 +-
.../storage/row/HoodieRowParquetWriteSupport.java | 6 +-
.../org/apache/hudi/AvroConversionUtils.scala | 2 +-
.../scala/org/apache/hudi/HoodieSparkUtils.scala | 4 +-
.../SparkFileFormatInternalRowReaderContext.scala | 20 +-
.../datasources/SparkColumnarFileReader.scala | 5 +-
.../parquet/HoodieParquetFileFormatHelper.scala | 5 +-
.../parquet/HoodieParquetReadSupport.scala | 15 +-
.../hudi/MultipleColumnarFileFormatReader.scala | 8 +-
.../keygen/TestTimestampBasedKeyGenerator.java | 4 +-
.../org/apache/hudi/avro/AvroRecordContext.java | 2 +-
.../AvroSchemaComparatorForRecordProjection.java | 4 +-
.../java/org/apache/hudi/avro/AvroSchemaUtils.java | 18 +-
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 16 +-
.../hudi/common/config/HoodieMetadataConfig.java | 40 +-
.../hudi/common/engine/HoodieReaderContext.java | 9 +
.../hudi/common/table/PartitionPathParser.java | 4 +-
.../table/log/block/HoodieAvroDataBlock.java | 29 +-
.../convert/AvroInternalSchemaConverter.java | 4 +-
.../hudi/metadata/HoodieTableMetadataUtil.java | 48 +-
.../java/org/apache/hudi/stats/ValueMetadata.java | 4 +-
.../main/java/org/apache/hudi/stats/ValueType.java | 2 +-
.../apache/parquet/schema/AvroSchemaRepair.java | 238 +++++
.../org/apache/hudi/avro/TestHoodieAvroUtils.java | 6 +-
.../common/testutils/HoodieTestDataGenerator.java | 2 +-
.../hudi/metadata/TestHoodieTableMetadataUtil.java | 159 +++-
.../parquet/schema/TestAvroSchemaRepair.java | 983 +++++++++++++++++++++
.../hudi/io/hadoop/HoodieAvroParquetReader.java | 13 +-
.../avro/HoodieAvroParquetReaderBuilder.java | 10 +-
.../apache/parquet/avro/HoodieAvroReadSupport.java | 13 +-
.../org/apache/parquet/schema/SchemaRepair.java | 162 ++++
.../parquet/avro/TestAvroSchemaConverter.java | 954 ++++++++++++++++++++
.../apache/parquet/schema/TestSchemaRepair.java | 600 +++++++++++++
.../schema/TestSchemaRepairEquivalence.java | 481 ++++++++++
.../src/test/resources/parquet-java/all.avsc | 110 +++
.../parquet-java/allFromParquetNewBehavior.avsc | 108 +++
.../parquet-java/allFromParquetOldBehavior.avsc | 117 +++
.../test/resources/parquet-java/fixedToInt96.avsc | 97 ++
.../hadoop/hive/serde2/avro/HiveTypeUtils.java | 2 +-
.../hudi/hadoop/HiveHoodieReaderContext.java | 7 +-
.../HoodieFileGroupReaderBasedRecordReader.java | 3 +-
.../hudi/hadoop/HoodieParquetInputFormat.java | 8 +-
.../hudi/hadoop/avro/HoodieAvroParquetReader.java | 51 +-
.../HoodieTimestampAwareParquetInputFormat.java | 7 +-
.../hudi/hadoop/utils/HiveAvroSerializer.java | 10 +-
.../hadoop/utils/HoodieArrayWritableAvroUtils.java | 6 +-
.../org/apache/hudi/ColumnStatsIndexSupport.scala | 92 +-
.../org/apache/hudi/ExpressionIndexSupport.scala | 6 +-
.../scala/org/apache/hudi/HoodieFileIndex.scala | 4 +-
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 4 +-
.../apache/hudi/PartitionStatsIndexSupport.scala | 23 +-
.../org/apache/hudi/SparkBaseIndexSupport.scala | 14 +-
.../apache/hudi/SparkHoodieTableFileIndex.scala | 17 +-
.../org/apache/hudi/cdc/CDCFileGroupIterator.scala | 11 +-
.../datasources/orc/SparkOrcReaderBase.scala | 2 +-
.../HoodieFileGroupReaderBasedFileFormat.scala | 35 +-
.../parquet/SparkParquetReaderBase.scala | 11 +-
.../hudi/command/payload/ExpressionPayload.scala | 10 +-
.../ShowColumnStatsOverlapProcedure.scala | 19 +-
.../ShowMetadataTableColumnStatsProcedure.scala | 7 +-
.../functional/TestSparkSortAndSizeClustering.java | 4 +-
.../trips_logical_types_json_cow_read_v6.zip | Bin 0 -> 103597 bytes
.../trips_logical_types_json_cow_read_v8.zip | Bin 0 -> 139172 bytes
.../trips_logical_types_json_cow_read_v9.zip | Bin 0 -> 200295 bytes
.../trips_logical_types_json_mor_read_v6.zip | Bin 0 -> 48290 bytes
..._logical_types_json_mor_read_v6_parquet_log.zip | Bin 0 -> 55640 bytes
.../trips_logical_types_json_mor_read_v8.zip | Bin 0 -> 68870 bytes
..._logical_types_json_mor_read_v8_parquet_log.zip | Bin 0 -> 76217 bytes
.../trips_logical_types_json_mor_read_v9.zip | Bin 0 -> 68838 bytes
..._logical_types_json_mor_read_v9_parquet_log.zip | Bin 0 -> 76199 bytes
.../hudi/TestAvroSchemaResolutionSupport.scala | 8 +-
.../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 1 +
.../hudi/functional/ColumnStatIndexTestBase.scala | 6 +-
.../functional/PartitionStatsIndexTestBase.scala | 2 +
.../hudi/functional/RecordLevelIndexTestBase.scala | 2 +-
.../apache/hudi/functional/TestCOWDataSource.scala | 75 +-
.../hudi/functional/TestColumnStatsIndex.scala | 17 +-
.../functional/TestColumnStatsIndexWithSQL.scala | 5 +-
.../functional/TestGlobalRecordLevelIndex.scala | 8 +-
.../apache/hudi/functional/TestMORDataSource.scala | 80 +-
.../hudi/functional/TestMetadataRecordIndex.scala | 2 +-
.../hudi/functional/TestPartitionStatsIndex.scala | 16 +-
.../TestHoodieBackedTableMetadataIndexLookup.scala | 8 +-
.../Spark33LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark33ParquetReader.scala | 43 +-
.../Spark34LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark34ParquetReader.scala | 41 +-
.../Spark35LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark35ParquetReader.scala | 41 +-
.../Spark40LegacyHoodieParquetFileFormat.scala | 1 +
.../datasources/parquet/Spark40ParquetReader.scala | 40 +-
.../utilities/HoodieMetadataTableValidator.java | 5 +-
.../TestHoodieMetadataTableValidator.java | 28 +-
.../deltastreamer/TestHoodieDeltaStreamer.java | 333 +++++++
.../src/test/resources/logical-repair/README.md | 88 ++
.../logical-repair/cow_write_updates/2/data.json | 6 +
.../logical-repair/cow_write_updates/3/data.json | 3 +
.../logical-repair/mor_write_updates/5/data.json | 3 +
.../src/test/resources/logical-repair/schema.avsc | 112 +++
.../trips_logical_types_json_cow_write.zip | Bin 0 -> 61182 bytes
...trips_logical_types_json_mor_write_avro_log.zip | Bin 0 -> 54841 bytes
...ps_logical_types_json_mor_write_parquet_log.zip | Bin 0 -> 59817 bytes
scripts/release/deploy_staging_jars.sh | 17 +-
...ars_java17.sh => deploy_staging_jars_java11.sh} | 7 +-
scripts/release/deploy_staging_jars_java17.sh | 2 +-
110 files changed, 5398 insertions(+), 343 deletions(-)
create mode 100644
hudi-common/src/main/java/org/apache/parquet/schema/AvroSchemaRepair.java
create mode 100644
hudi-common/src/test/java/org/apache/parquet/schema/TestAvroSchemaRepair.java
create mode 100644
hudi-hadoop-common/src/main/java/org/apache/parquet/schema/SchemaRepair.java
create mode 100644
hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
create mode 100644
hudi-hadoop-common/src/test/java/org/apache/parquet/schema/TestSchemaRepair.java
create mode 100644
hudi-hadoop-common/src/test/java/org/apache/parquet/schema/TestSchemaRepairEquivalence.java
create mode 100644 hudi-hadoop-common/src/test/resources/parquet-java/all.avsc
create mode 100644
hudi-hadoop-common/src/test/resources/parquet-java/allFromParquetNewBehavior.avsc
create mode 100644
hudi-hadoop-common/src/test/resources/parquet-java/allFromParquetOldBehavior.avsc
create mode 100644
hudi-hadoop-common/src/test/resources/parquet-java/fixedToInt96.avsc
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_cow_read_v6.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_cow_read_v8.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_cow_read_v9.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v6.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v6_parquet_log.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v8.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v8_parquet_log.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v9.zip
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/resources/trips_logical_types_json_mor_read_v9_parquet_log.zip
create mode 100644 hudi-utilities/src/test/resources/logical-repair/README.md
create mode 100644
hudi-utilities/src/test/resources/logical-repair/cow_write_updates/2/data.json
create mode 100644
hudi-utilities/src/test/resources/logical-repair/cow_write_updates/3/data.json
create mode 100644
hudi-utilities/src/test/resources/logical-repair/mor_write_updates/5/data.json
create mode 100644 hudi-utilities/src/test/resources/logical-repair/schema.avsc
create mode 100644
hudi-utilities/src/test/resources/logical-repair/trips_logical_types_json_cow_write.zip
create mode 100644
hudi-utilities/src/test/resources/logical-repair/trips_logical_types_json_mor_write_avro_log.zip
create mode 100644
hudi-utilities/src/test/resources/logical-repair/trips_logical_types_json_mor_write_parquet_log.zip
copy scripts/release/{deploy_staging_jars_java17.sh =>
deploy_staging_jars_java11.sh} (89%)