This is an automated email from the ASF dual-hosted git repository.
vhs pushed a change to branch release-1.0.2
in repository https://gitbox.apache.org/repos/asf/hudi.git
discard 0e05e3a1c76 [HUDI-9267] Fix the file group reader log file read
sequence (#13115)
discard a3dcd03579c [HUDI-9286] Fix the atomicity of archived timeline write
(#13128)
discard b4dec4006f2 [HUDI-9262] Skip building stats for decimal field with
very high precision (#13097)
discard 548ba3d41b0 [MINOR] Overload HoodieFileGroupReader constructor to
bring back old API without allowInflightInstants parameter (#13116)
discard 50fc937eb4b [HUDI-9288] Fixing HoodieFileGroup api related to
uncommitted slices (#13125)
discard 6f96dba86d6 [HUDI-9269] Handle tight bound field in columns stats
metadata (#13117)
discard 80022d029fc [HUDI-9263] Archived timeline downgrade fails with
EightToSevenDowngradeHandler (#13098)
discard 4be388cd427 [HUDI-9258] Disable partial update for CUSTOM merge mode
(#13092)
discard 13064e19249 [HUDI-9156] Add lock provider heartbeat manager (#12958)
discard ba2694bacf1 [HUDI-9155] Add scaffolding for lock provider using
conditional writes (#12954)
discard 44fc1c0e798 [MINOR] Fix logging issue with
EightToSevenDowngradeHandler (#13096)
discard 67827dfe1b1 [MINOR] Update javadocs in MergeIntoHoodieTableCommand
(#13093)
discard 68d7530bd68 [MINOR] Upgrade jetty version to 9.4.57.v20241219 to fix
CVE-2024-8184 (#13058)
discard 02bbef7e442 [HUDI-9247] Flip default value of reuse of TimeGenerator
instance (#13077)
discard 2187205b280 [HUDI-9258] Disable partial update when global index is
used (#13086)
discard b4c3ad69a91 [HUDI-9259] Fixing marker reconciliation for failures
during deleting additional files (#13088)
discard 75bd69c2641 [HUDI-9255] Fix inferring correct merge behavior for few
scenarios (#13079)
discard 46a1c76fe3d [MINOR] Fix NumberFormatException while updating metrics
for MDT in table version 6 (#13056)
discard 9418abb3819 [MINOR] Fixing master for build failure (#13085)
discard 002de745054 [MINOR] Remove warning around table version six (#13080)
discard 859b173101e [HUDI-9252] BaseHoodieWriteClient should only check for
upgrade (#13073)
discard 56bfb8502a5 [HUDI-9206] Support reading inflight instants with
HoodieLogRecordReader (#13010)
discard be7b7316fb2 [MINOR]Fix typo and Add implementation class name in
interface method of HoodieRecordMerger (#13059)
discard fe4aac027c9 [HUDI-8635] Support numWrites metric for compaction
(#13047)
discard ac195c074c5 [HUDI-8409] Fixing merge mode config during upgrade and
downgrade from version 7 to 8 and back (#13046)
discard 1d33f31b4b7 [HUDI-9236] Handle markers for log files in table version
6 (#13007)
discard 6d2c80d2289 [HUDI-9133] Fallback to legacy schema fetching in case of
IllegalAccessError from schema registry client (#13030)
discard 66a6bc7ffff [HUDI-9239] Fix the bug of Spark Cache not releasing
cleanly (#13050)
discard 384a150bd16 [HUDI-9238] Fix bug with HudiIncrSource when source is
table version 6 (#13048)
discard 1e414a76d76 [HUDI-9167] Remove redundant classes in
hudi-utilities-slim-bundle (#12962)
discard 2fbef041444 [HUDI-9233] Handle empty table edge case for col stats
initialization (#13045)
discard 6166ad8728a [HUDI-9227] Fix bulk insert overwrite after a failed
insert overwrite (#13041)
discard 96c1e9a5c66 [MINOR] Use configutils to handle defaults for all zk lock
provider configs (#13039)
discard 8fc35f09008 [HUDI-9088] Fix unnecessary scanning of target table in
MERGE INTO on Spark (#12934)
discard 3722417a51b [MINOR] Follow up HUDI-8803, optimize vectorized reader by
cache 'batchIdxField' (#13023)
discard fe85e957554 [HUDI-9216] Ignore validation of empty databaseName in
HoodieTableMetaClient (#13033)
discard 34ba8984dff [MINOR] Fix flaky test testCreateNewInstantTimes (#13032)
discard ac6e03cfe93 [HUDI-9212] Fix metadata compaction failure post downgrade
(#13022)
discard 6b4a823eedc [HUDI-7037] Fix colstats reading for Decimal field (#12993)
discard f0484f33c57 [HUDI-9220] Cannot find write operation type if run inline
log compaction (#13029)
discard b526ac5bc2c [HUDI-9216] Fallback to spark.catalog.currentDatabase if
tableConfig.databaseName is null or empty (#13026)
discard c3725999f0f [HUDI-9211] Fix bug with config in DataHubSyncTool (#13018)
discard 67f72273f61 [HUDI-9215] Set partitionColumnsWithKeyGenerator based on
table version (#13025)
discard a16bf51c159 [HUDI-9022] Handle records with custom delete markers in
FG reader (#12843)
discard 41438c009ee [HUDI-9207] Spark Insert Overwrite Support Row Writer
(#13014)
discard ea58f229719 [HUDI-9198] Support rate limit for append mode (#12999)
discard 6e73206901a [HUDI-9120] Remove HUDI-9130 code changes relating to FGR
(#12935)
discard 8e5a7b5225b [HUDI-9120] Fix merge mode inference for table version 6
in file group reader (#12991)
discard f5c2cca374a [HUDI-9013] Add backwards compatible MDT writer support
and reader support with tbl v6 (#12948)
discard ee4454e356c [HUDI-9083] Fixing flakiness with multi writer test
(#12987)
discard 9f0438f8ff0 [HUDI-9170] Fixing schema projection with file group
reader (#12970)
discard 1f7c5fca059 [HUDI-9127] Fixing completion time generation to honor
timezone from table config (#12926)
discard ef639e514c3 [HUDI-9175] Remove the unnecessary MDT metadata check for
col_stats index config update (#12977)
discard 5f4058109b8 [HUDI-7375] Enable
testLogReaderWithDifferentVersionsOfDeleteBlocks (#12668)
discard 48b39b6df92 [HUDI-8345] Delete partition stats index for a partition
that is deleted (#12953)
discard 54cbfebf609 [HUDI-9168] Refactor hudi-client-common to not import
hudi-aws (#12969)
discard ea15c41d835 [HUDI-9186] Remove tableState from
HoodieFileGroupReaderBasedParquetFileFormat constructor (#12981)
discard 323076cddce [HUDI-9173] Fix issue with inflight compaction and global
index lookup (#12976)
discard 888776e4d03 [HUDI-9166] Introduce schema pruning for delete-record
(#12961)
discard 7a734e37f47 [HUDI-9166] Static cleanup method that doesn't hold
references to DiskMap instances (#12956)
discard 9def3bfb71e [MINOR] Improve error logs on mdt validation job (#12973)
discard db6f98741e4 Fix hadoop deps from hudi-common (#12965)
discard debd313c9cf [HUDI-7803] Fix bundle validation on Flink 1.18 (#12959)
discard f36a125511a [MINOR] Remove storage instance variable from
HoodieIngestionMetrics (#12937)
discard b423ba5ab2b [HUDI-9152] Improve read/write/compaction performance by
reusing avro schema (#12949)
discard e51d39220e3 [HUDI-9141] Handle the case that dbName is empty in
HoodieFileIndex (#12885)
discard 05a308399e4 [HUDI-9068] Fix cloudwatch metrics (#12873)
discard 9dc7fa75027 [HUDI-9132] Avoid empty string row key for delete and
update operations (#12929)
discard b72659f7dc5 [HUDI-9125] Pass compaction/merge related props to
HoodieBaseFileGroupRecordBuffer (#12925)
discard 7e5ed58811f [HUDI-8768] Support bloom filter options when creating
expr index using bloom filter (#12919)
discard b93c4c0bc30 [MINOR] Fix cleaner config compatibility with older
releases (#12943)
discard 388f0e76880 [MINOR] Skip merging Jacoco execution data file from Azure
Job 1 FT (#12942)
discard d53523e34a6 [HUDI-9086] Re-enable flaky tests and fixing spark context
not shutting down (#12909)
discard bf6e58cf618 [HUDI-9057] Fixing ClassNotFound issue w/
ProtoBufSchemaProvider (#12913)
discard 91ce8b7e945 [MINOR] Add retries and logs to merging Jacoco execution
data files (#12921)
discard c9f228f90f9 [HUDI-9115] Deprecate Hudi CLI script (#12916)
discard 7cc283f8fde [MINOR] Fix generating file id with wrong bucket index
(#12917)
discard a15b932b996 [HUDI-9030] Compatibility fixes for table version 6 with
1.0 (#12888)
discard da8d035774d [HUDI-8219] Add concurrent schema evolution conflict
detection (#12781)
discard d04a9019a19 [HUDI-5387] Add bundle validation for hudi-cli-bundle
(#12882)
discard 963c30f8eae [HUDI-9067] Fixing num spark tasks for clean action
(#12874)
discard 6ad5afc8188 [HUDI-8745] Add tests for record index and secondary index
with insert dups policy (#12914)
discard 4da5f94f4e0 [HUDI-8826] Extend MIT partial update test (#12915)
discard e98b4e1bbcd [HUDI-9092] Deprecate byte array of serialization of
instants (#12900)
discard 87ad953c5ee [HUDI-9079] Log the exception message properly to handle
the 'default' partition value migration steps (#12911)
discard ade674b66b1 [HUDI-8992] Deprecate all byte array usage in metadata
deserialization path (#12826)
discard 23c3440cd4b [HUDI-9093] Fix the HoodieClusteringJob duplicate
parameter alias (#12897)
discard 0587ccd841b [HUDI-9086] Disabling failing test to unblock master
(#12890)
discard c8b2500172e [HUDI-8486] Add column mismatch test coverage (#12800)
discard 9d091248cab [HUDI-8486] Enforce data type match for required columns
in Spark SQL MERGE INTO (#12798)
discard 571217e1115 [HUDI-9065] Adding new filters to ArchivedTimelineV1
(#12869)
discard 5fccbd3973c [HUDI-7985] Add more test cases around timestamp and
decimal formats in Json Avro converter (#11629)
discard 3fecddf6e4e [HUDI-8577] Use config default for query type in default
source in Spark (#12435)
discard f184bce41a5 [HUDI-8940] Fix Bloom Index Partitioner to distribute keys
uniformly across partitions (#12741)
discard fdd6fe2917e [HUDI-8992] Fix serde issues in commit metadata (#12829)
discard 1b06059e9cc [HUDI-9072] Support decimal in JsonKafkaSource (#12879)
discard 05a4b5051cd [HUDI-8954] Reconstruct writer schema while clustering by
spark row writer (#12758)
discard c26fd1bee8f [HUDI-9040] Set the correct table path when renaming
tables (#12848)
discard ebccf2f2df3 [HUDI-9071] MDT validator can configure if log truncation
applies (#12877)
discard cd4e24ad87a [HUDI-9061] JSON to AVRO schema converter (#12864)
discard d07c6b703d4 [hudi-9041] Send commit ack event when reusing current
instant (#12849)
discard 873821724e6 [HUDI-8901] Fix Timeline Server to process requests from
multiple storage lakes (#12696)
discard d2724ed5806 [HUDI-9070] Fix Hudi cli bundle script to use
HUDI_CONF_DIR environment variable (#12876)
discard e9e4417c9ed [MINOR] Add StorageSchemes for Aliyun Apsara File Storage
for HDFS (#12872)
discard 5856027c2d6 [HUDI-8955] Resolve Kafka beginning offsets with retention
to prevent OffsetOutOfRange exception (#12762)
discard f49f102272e [HUDI-8378] Fix Avro schema deserializer failing with
schema evolution (#12111)
discard b2a739a4ebf [HUDI-8501] Improve SizeAwareDataInputStream to implement
idempotent (#12231)
discard a1f74834285 [HUDI-8126] Use union to parallelize data and error table
writes (#12813)
discard 73e12c06b4e [HUDI-8868] Use external spillable map for
cachedAllInputFileSlices in BaseHoodieTableFileIndex (#12647)
discard 2921d87c429 [HUDI-8786] IntelliJ automatic code cleanup tool (#12524)
discard 916527f45a5 [HUDI-8126] Support proto messages for spark kryo
serializer excluding DynamicMessages (#12052)
discard d6634da8c61 [HUDI-9060] Remove validations for clustering metadata
(#12860)
discard 855e0b91835 [HUDI-9059] Fix ordering of getInstantTimes in
CompletionTimeQueryViewV2 (#12858)
discard 77920ab959c [HUDI-8988] Implement retry logic for all HTTP calls to
timeline server including remote file system view and markers (#12804)
discard a3401684080 [MINOR] Increase timeout of hudi-utilities tests in Azure
CI (#12862)
discard 8a6c150f50b [HUDI-9016] Fix the HoodieCompactor schedule parameter
alias name #12836
discard 05800716c4f [HUDI-8442] Reduce timeline loads while rolling back
failed writes (#12164)
discard 79fd3e5de0f [HUDI-9045] Spillable Map operational improvements (#12850)
discard 7a891f0c1f8 [HUDI-8883] Add ability to configure
HoodieCompactionPlanGenerator using reflection (#12664)
discard ee0c41114a0 [HUDI-8126] Persist sourceRdd to optimise writeStatus DAG
for error table (#11844)
discard 055c1cfda6f [HUDI-8882] Support Compaction/Rollback/Clean Timeline
Instant Metrics in HoodieMetrics (#12681)
discard f14f95c538e [HUDI-7624] Fixing source read and index tagging duration
(#12789)
discard b7d498b2153 [HUDI-9039] Run do init table transaction only when
required (#12847)
discard 7d470db3a45 [MINOR] Rebalance Azure CI jobs (2025-02-14) (#12844)
discard 9f4599bc1b2 [HUDI-9038] Enable Codecov tracking and comment on code
coverage (#12845)
discard 6110f9deab7 [HUDI-7222] Fix Scala style check (#12837)
discard 763d71bc14e [HUDI-8962] Adding more logs during indexing (#12787)
discard 2d94433e9ae [HUDI-7596] Enable Jacoco code coverage report across
multiple modules (#12842)
discard 4e1b1f8690e [HUDI-8985] Fix avg record size estimator to avoid parsing
all commit metadata of active timeline (#12803)
discard 16dddb7ba4e [MINOR] Rebalance Azure CI jobs (2025-02-13) (#12840)
discard ebb7bfac7dc [MINOR] Fix skew in clustering operator (#12765)
discard 0e5c4075e1b [HUDI-8972] Fixing heart beats for failed writes in
HoodieStreamer (#12802)
discard fb4046cd44f [MINOR] Register more classes with Kryo (#12268)
discard 750fa4f72fa [HUDI-8701] prevent lock metrics failure from failing
process (#12458)
discard 0783c9fc2c2 [HUDI-9009] Fix potential race condition when listing
files by leveraging exception type #12834
discard 0c223ec6c66 [HUDI-8992] Initial changes to allow us to use streams
instead of byte arrays (#12814)
discard 07ab36f5197 [HUDI-8971] Fixing metadata metrics to honor metrics
prefix (#12797)
discard c0746351f5f [HUDI-8885] Improve the efficiency of initializing the
metadata table and table services (#12665)
discard a09ab5bfe23 [HUDI-5612] Add support for using metadata table with
spillable map based file system views (#12110)
discard cc1d0976546 [HUDI-8798] Validate Metadata Table Enabled during
Archival (#12549)
discard 0216728a2b3 [HUDI-8941] Stop spark context with proper exit code in
HoodieStreamer (#12742)
discard f2a653a7753 [HUDI-8922] Avoid scheduling existing rollbacks (#12715)
discard 38b9d1eeaf8 [HUDI-8998] Improve handling of zero scale decimals in
MercifulJsonConverter (#12822)
discard 4fa61c721f0 [HUDI-8959] HoodieMetadataTableValidator can skip
validation if data table is not found (#12775)
discard 34632cc2d33 [HUDI-8965] Fix Hoodie Hive Sync Tool to Throw Exceptions
on Sync Failures (#12790)
discard a21dcc34827 [HUDI-8967] Add a config to fail job if duplicate data
files detected during reconcileAgainstMarkers (#12792)
discard 9e480538b5a [HUDI-8569] Fix Insert overwrite / update MOR with global
index does not work (#12819)
discard b399bd29a54 [MINOR] Updating doap for release 1.0.1 (#12816)
discard 4ae869e691b [HUDI-8989] Fixing Compaction scheduling for tbl v6
(#12805)
discard 9ec170eb31e [MINOR] Fix jenkins compile issue (#12810)
discard e7de74a6105 [HUDI-8958] Enable record index validation by default
(#12785)
discard 4ba44acb201 [HUDI-8966] Claim RFC-88 for New
Schema/DataType/Expression Abstractions (#12791)
discard 5c50ee152ee [MINOR] Move field ordering lookup outside of loop for
proto conversion (#12271)
discard 4cee31fef1e [MINOR] Fix code issues reported by SonarQube (#12767)
discard e0f5d40cfa8 [HUDI-8951] Disabled flaky
`TestHoodieDeltaStreamer::testHoodieIndexerExecutionAfterClustering` (#12769)
discard 3e48496d958 [HUDI-8278] Add log message about different primary keys
for HoodieTableFactory (#12766)
discard ebd6282b367 [HUDI-8950] Explicitly set base path in sync config #12754
discard ad0197caf99 [HUDI-8767] Add bundle validation for release artifacts in
Maven central (#12488)
discard 3b4132b9066 [HUDI-8953] Update bundle validation workflow on a release
candidate (#12755)
new baddd411511 [HUDI-8953] Update bundle validation workflow on a release
candidate (#12755)
new fee4981690a [HUDI-8767] Add bundle validation for release artifacts in
Maven central (#12488)
new 8ab60c13653 [HUDI-8950] Explicitly set base path in sync config #12754
new dac905d2e48 [HUDI-8278] Add log message about different primary keys
for HoodieTableFactory (#12766)
new f744a09874d [HUDI-8951] Disabled flaky
`TestHoodieDeltaStreamer::testHoodieIndexerExecutionAfterClustering` (#12769)
new 7beccbc391d [MINOR] Fix code issues reported by SonarQube (#12767)
new 2bed90726fb [MINOR] Move field ordering lookup outside of loop for
proto conversion (#12271)
new 0b901e8c7a9 [HUDI-8966] Claim RFC-88 for New
Schema/DataType/Expression Abstractions (#12791)
new e7f32f97af7 [HUDI-8958] Enable record index validation by default
(#12785)
new ca712fd8580 [MINOR] Fix jenkins compile issue (#12810)
new 7d50d68b6bf [HUDI-8989] Fixing Compaction scheduling for tbl v6
(#12805)
new a87542676b7 [MINOR] Updating doap for release 1.0.1 (#12816)
new 47cb2b49d09 [HUDI-8569] Fix Insert overwrite / update MOR with global
index does not work (#12819)
new 7f8988ba233 [HUDI-8967] Add a config to fail job if duplicate data
files detected during reconcileAgainstMarkers (#12792)
new 722f672faa0 [HUDI-8965] Fix Hoodie Hive Sync Tool to Throw Exceptions
on Sync Failures (#12790)
new 79f8f64c0fa [HUDI-8959] HoodieMetadataTableValidator can skip
validation if data table is not found (#12775)
new f306c09bb3a [HUDI-8998] Improve handling of zero scale decimals in
MercifulJsonConverter (#12822)
new 7dd2bff532e [HUDI-8922] Avoid scheduling existing rollbacks (#12715)
new 4e5826d2880 [HUDI-8941] Stop spark context with proper exit code in
HoodieStreamer (#12742)
new c9502125c17 [HUDI-8798] Validate Metadata Table Enabled during
Archival (#12549)
new 600d2a693cf [HUDI-5612] Add support for using metadata table with
spillable map based file system views (#12110)
new 4df692940c6 [HUDI-8885] Improve the efficiency of initializing the
metadata table and table services (#12665)
new 659033464f2 [HUDI-8971] Fixing metadata metrics to honor metrics
prefix (#12797)
new dc4e44348de [HUDI-8992] Initial changes to allow us to use streams
instead of byte arrays (#12814)
new d2ddd5dd59c [HUDI-9009] Fix potential race condition when listing
files by leveraging exception type #12834
new c1e8c32122a [HUDI-8701] prevent lock metrics failure from failing
process (#12458)
new 581215db34f [MINOR] Register more classes with Kryo (#12268)
new bf6e28df090 [HUDI-8972] Fixing heart beats for failed writes in
HoodieStreamer (#12802)
new 9bac97c4473 [MINOR] Fix skew in clustering operator (#12765)
new ba74ea1759e [MINOR] Rebalance Azure CI jobs (2025-02-13) (#12840)
new f2916d9268c [HUDI-8985] Fix avg record size estimator to avoid parsing
all commit metadata of active timeline (#12803)
new 7f1c623e405 [HUDI-7596] Enable Jacoco code coverage report across
multiple modules (#12842)
new 79e38b32daa [HUDI-8962] Adding more logs during indexing (#12787)
new bfd3479b6d2 [HUDI-7222] Fix Scala style check (#12837)
new 9cb4de8f905 [HUDI-9038] Enable Codecov tracking and comment on code
coverage (#12845)
new b5d4a136914 [MINOR] Rebalance Azure CI jobs (2025-02-14) (#12844)
new 6a93716f163 [HUDI-9039] Run do init table transaction only when
required (#12847)
new 43f697fb65c [HUDI-7624] Fixing source read and index tagging duration
(#12789)
new 959cf5aeddf [HUDI-8882] Support Compaction/Rollback/Clean Timeline
Instant Metrics in HoodieMetrics (#12681)
new 9ff2779fdf3 [HUDI-8126] Persist sourceRdd to optimise writeStatus DAG
for error table (#11844)
new 02bbc9765b1 [HUDI-8883] Add ability to configure
HoodieCompactionPlanGenerator using reflection (#12664)
new bf36263216e [HUDI-9045] Spillable Map operational improvements (#12850)
new f3207f21f6e [HUDI-8442] Reduce timeline loads while rolling back
failed writes (#12164)
new bf104cd8802 [HUDI-9016] Fix the HoodieCompactor schedule parameter
alias name #12836
new 7c91b70022d [MINOR] Increase timeout of hudi-utilities tests in Azure
CI (#12862)
new ffa4ca6f713 [HUDI-8988] Implement retry logic for all HTTP calls to
timeline server including remote file system view and markers (#12804)
new 172d8e5513b [HUDI-9059] Fix ordering of getInstantTimes in
CompletionTimeQueryViewV2 (#12858)
new 9fc9c2951c1 [HUDI-9060] Remove validations for clustering metadata
(#12860)
new 56c91297563 [HUDI-8126] Support proto messages for spark kryo
serializer excluding DynamicMessages (#12052)
new a747d313254 [HUDI-8786] IntelliJ automatic code cleanup tool (#12524)
new 06c5319e54e [HUDI-8868] Use external spillable map for
cachedAllInputFileSlices in BaseHoodieTableFileIndex (#12647)
new ab0f9d29115 [HUDI-8126] Use union to parallelize data and error table
writes (#12813)
new ce4de61b8e8 [HUDI-8501] Improve SizeAwareDataInputStream to implement
idempotent (#12231)
new b5f73fae854 [HUDI-8378] Fix Avro schema deserializer failing with
schema evolution (#12111)
new 035b0e930d8 [HUDI-8955] Resolve Kafka beginning offsets with retention
to prevent OffsetOutOfRange exception (#12762)
new 7a5482452a7 [MINOR] Add StorageSchemes for Aliyun Apsara File Storage
for HDFS (#12872)
new 01759b3818c [HUDI-9070] Fix Hudi cli bundle script to use
HUDI_CONF_DIR environment variable (#12876)
new 18869ca2d3e [HUDI-8901] Fix Timeline Server to process requests from
multiple storage lakes (#12696)
new 52150105d48 [HUDI-8920] [RFC-84] Optimize SerDe of stream records for
Flink write (excluding bulk insert and append mode) (#12796)
new 7afbb23cb0c [hudi-9041] Send commit ack event when reusing current
instant (#12849)
new 3be117885f8 [HUDI-9061] JSON to AVRO schema converter (#12864)
new 6debd1bb219 [HUDI-9071] MDT validator can configure if log truncation
applies (#12877)
new 0d61608d2de [HUDI-9040] Set the correct table path when renaming
tables (#12848)
new c9a8ba60459 [HUDI-8954] Reconstruct writer schema while clustering by
spark row writer (#12758)
new 2db3b9305c5 [HUDI-9072] Support decimal in JsonKafkaSource (#12879)
new e0ec9574f5f [HUDI-8992] Fix serde issues in commit metadata (#12829)
new ca7513fea7b [HUDI-8940] Fix Bloom Index Partitioner to distribute keys
uniformly across partitions (#12741)
new 7bbeda35229 [HUDI-8577] Use config default for query type in default
source in Spark (#12435)
new 9b0ed22e926 [HUDI-7985] Add more test cases around timestamp and
decimal formats in Json Avro converter (#11629)
new 46385f98cb2 [HUDI-9065] Adding new filters to ArchivedTimelineV1
(#12869)
new b08ba1e30e5 [HUDI-8486] Enforce data type match for required columns
in Spark SQL MERGE INTO (#12798)
new d1d553910bd [HUDI-8486] Add column mismatch test coverage (#12800)
new dbf56d01a01 [HUDI-9086] Disabling failing test to unblock master
(#12890)
new 8c8f4164468 [HUDI-9093] Fix the HoodieClusteringJob duplicate
parameter alias (#12897)
new 100174b1214 [HUDI-8992] Deprecate all byte array usage in metadata
deserialization path (#12826)
new cba6f734160 [HUDI-9079] Log the exception message properly to handle
the 'default' partition value migration steps (#12911)
new f8083c16f91 [HUDI-9092] Deprecate byte array of serialization of
instants (#12900)
new 88005536df3 [HUDI-8826] Extend MIT partial update test (#12915)
new c8756b1d28b [HUDI-8745] Add tests for record index and secondary index
with insert dups policy (#12914)
new eab943a6cac [HUDI-9067] Fixing num spark tasks for clean action
(#12874)
new c43fc1943ba [HUDI-5387] Add bundle validation for hudi-cli-bundle
(#12882)
new d0f13b78b11 [HUDI-8219] Add concurrent schema evolution conflict
detection (#12781)
new 4b95d7b7b30 [HUDI-9030] Compatibility fixes for table version 6 with
1.0 (#12888)
new 23c63de66da [MINOR] Fix generating file id with wrong bucket index
(#12917)
new b88ab08c47a [HUDI-9115] Deprecate Hudi CLI script (#12916)
new bf1304ccf9b [MINOR] Add retries and logs to merging Jacoco execution
data files (#12921)
new 2ed8da1ca9a [HUDI-9057] Fixing ClassNotFound issue w/
ProtoBufSchemaProvider (#12913)
new 2dcc318dd04 [HUDI-9086] Re-enable flaky tests and fixing spark context
not shutting down (#12909)
new 60af73b0cad [MINOR] Skip merging Jacoco execution data file from Azure
Job 1 FT (#12942)
new 43911fa8df3 [MINOR] Fix cleaner config compatibility with older
releases (#12943)
new d7f8da82c33 [HUDI-8768] Support bloom filter options when creating
expr index using bloom filter (#12919)
new d8e604e9ce3 [HUDI-9125] Pass compaction/merge related props to
HoodieBaseFileGroupRecordBuffer (#12925)
new 3fcbe4b6fcb [HUDI-9132] Avoid empty string row key for delete and
update operations (#12929)
new 0ed25ee283b [HUDI-9068] Fix cloudwatch metrics (#12873)
new 343d04fe3b4 [HUDI-9141] Handle the case that dbName is empty in
HoodieFileIndex (#12885)
new c9dcffabb24 [HUDI-9152] Improve read/write/compaction performance by
reusing avro schema (#12949)
new e6fcc76f658 [MINOR] Remove storage instance variable from
HoodieIngestionMetrics (#12937)
new 7cc3806d790 [HUDI-7803] Fix bundle validation on Flink 1.18 (#12959)
new f830696dc46 Fix hadoop deps from hudi-common (#12965)
new 37bb1ddb917 [MINOR] Improve error logs on mdt validation job (#12973)
new bab3d663c96 [HUDI-9166] Static cleanup method that doesn't hold
references to DiskMap instances (#12956)
new 2f7990653e7 [HUDI-9166] Introduce schema pruning for delete-record
(#12961)
new d6cf97881cd [HUDI-9173] Fix issue with inflight compaction and global
index lookup (#12976)
new 6741d3628fa [HUDI-9186] Remove tableState from
HoodieFileGroupReaderBasedParquetFileFormat constructor (#12981)
new a03f726cdac [HUDI-9168] Refactor hudi-client-common to not import
hudi-aws (#12969)
new 6186a4c1eac [HUDI-8345] Delete partition stats index for a partition
that is deleted (#12953)
new c9cb1f5ba1e [HUDI-7375] Enable
testLogReaderWithDifferentVersionsOfDeleteBlocks (#12668)
new 31be9b1dcbe [HUDI-9175] Remove the unnecessary MDT metadata check for
col_stats index config update (#12977)
new 7d12836e4e6 [HUDI-9127] Fixing completion time generation to honor
timezone from table config (#12926)
new 6e8bd45d9af [HUDI-9170] Fixing schema projection with file group
reader (#12970)
new 3b0e90da4ff [HUDI-9083] Fixing flakiness with multi writer test
(#12987)
new aff16c487a1 [HUDI-9013] Add backwards compatible MDT writer support
and reader support with tbl v6 (#12948)
new 2b6b281f993 [HUDI-9120] Fix merge mode inference for table version 6
in file group reader (#12991)
new af37637093a [HUDI-9120] Remove HUDI-9130 code changes relating to FGR
(#12935)
new 7d6c0b1a5b5 [HUDI-9198] Support rate limit for append mode (#12999)
new 9ceeaf3cdcf [HUDI-9207] Spark Insert Overwrite Support Row Writer
(#13014)
new 910abe77694 [HUDI-9022] Handle records with custom delete markers in
FG reader (#12843)
new 6d63916feda [HUDI-9215] Set partitionColumnsWithKeyGenerator based on
table version (#13025)
new cd69e5d40bd [HUDI-9211] Fix bug with config in DataHubSyncTool (#13018)
new 1c041b1c0a1 [HUDI-9216] Fallback to spark.catalog.currentDatabase if
tableConfig.databaseName is null or empty (#13026)
new 93aafd81713 [HUDI-9220] Cannot find write operation type if run inline
log compaction (#13029)
new 7e315e05f61 [HUDI-7037] Fix colstats reading for Decimal field (#12993)
new 4c8360ae4bc [HUDI-9212] Fix metadata compaction failure post downgrade
(#13022)
new 1374e7f9e0d [MINOR] Fix flaky test testCreateNewInstantTimes (#13032)
new 857e98ea044 [HUDI-9216] Ignore validation of empty databaseName in
HoodieTableMetaClient (#13033)
new 7ddeba57cd2 [MINOR] Follow up HUDI-8803, optimize vectorized reader by
cache 'batchIdxField' (#13023)
new 0b3aab2e08a [HUDI-9088] Fix unnecessary scanning of target table in
MERGE INTO on Spark (#12934)
new 4d4a9867a1b [MINOR] Use configutils to handle defaults for all zk lock
provider configs (#13039)
new 8795bb726f2 [HUDI-9227] Fix bulk insert overwrite after a failed
insert overwrite (#13041)
new 4530c1f3e28 [HUDI-9233] Handle empty table edge case for col stats
initialization (#13045)
new a0f98ffb7d2 [HUDI-9167] Remove redundant classes in
hudi-utilities-slim-bundle (#12962)
new 01050086a37 [HUDI-9238] Fix bug with HudiIncrSource when source is
table version 6 (#13048)
new f0860cf009f [HUDI-9239] Fix the bug of Spark Cache not releasing
cleanly (#13050)
new 6a4cbc6bfe2 [HUDI-9133] Fallback to legacy schema fetching in case of
IllegalAccessError from schema registry client (#13030)
new 6ce55788692 [HUDI-9231] Show complete DAG for one single query in
spark web ui while inserting into hudi table (#13044)
new 64799bcecc6 [HUDI-9236] Handle markers for log files in table version
6 (#13007)
new 5c64e3d8eb3 [HUDI-8409] Fixing merge mode config during upgrade and
downgrade from version 7 to 8 and back (#13046)
new 0d36693b8b1 [HUDI-8635] Support numWrites metric for compaction
(#13047)
new 1ec91e5cd94 [MINOR]Fix typo and Add implementation class name in
interface method of HoodieRecordMerger (#13059)
new ca2e29e3e94 [HUDI-9206] Support reading inflight instants with
HoodieLogRecordReader (#13010)
new 76989ad9eb0 [HUDI-9252] BaseHoodieWriteClient should only check for
upgrade (#13073)
new 688b9bdd13e [MINOR] Remove warning around table version six (#13080)
new 59092d9c2d9 [MINOR] Fixing master for build failure (#13085)
new b3fd2144cbc [MINOR] Fix NumberFormatException while updating metrics
for MDT in table version 6 (#13056)
new 5dad9b046a7 [HUDI-9255] Fix inferring correct merge behavior for few
scenarios (#13079)
new e9789cb5e43 [HUDI-9259] Fixing marker reconciliation for failures
during deleting additional files (#13088)
new 4c1c30f60e0 [HUDI-9258] Disable partial update when global index is
used (#13086)
new 5378b3e92de [HUDI-9247] Flip default value of reuse of TimeGenerator
instance (#13077)
new e18e8b7d7d9 [MINOR] Upgrade jetty version to 9.4.57.v20241219 to fix
CVE-2024-8184 (#13058)
new e238fa50f6d [MINOR] Update javadocs in MergeIntoHoodieTableCommand
(#13093)
new 36df87759aa [MINOR] Fix logging issue with
EightToSevenDowngradeHandler (#13096)
new f7778afe759 [HUDI-9155] Add scaffolding for lock provider using
conditional writes (#12954)
new 4a431a0cc2f [HUDI-9156] Add lock provider heartbeat manager (#12958)
new 05cdae005f2 [HUDI-9258] Disable partial update for CUSTOM merge mode
(#13092)
new b6c1008d342 [HUDI-9263] Archived timeline downgrade fails with
EightToSevenDowngradeHandler (#13098)
new 2b01e97d2cc [HUDI-9269] Handle tight bound field in columns stats
metadata (#13117)
new e88b086db6b [HUDI-9288] Fixing HoodieFileGroup api related to
uncommitted slices (#13125)
new 78b6e2aac42 [MINOR] Overload HoodieFileGroupReader constructor to
bring back old API without allowInflightInstants parameter (#13116)
new 002add43d3b [HUDI-9270] Support displaying complete dag for update
statement in spark web ui (#13110)
new a270e2f7682 [HUDI-9262] Skip building stats for decimal field with
very high precision (#13097)
new d08b90c3444 [HUDI-9286] Fix the atomicity of archived timeline write
(#13128)
new 7d6cb3fcb32 [HUDI-9267] Fix the file group reader log file read
sequence (#13115)
new 5009f47166b [HUDI-9158] Add storage-based lock provider abstract
implementation (#13103)
new 163a7753911 [HUDI-5092] Support Databricks Spark runtime (#13129)
new d4d23e0b274 [HUDI-9308] Fix incorrect usage of
mapreduce.input.fileinputformat.split.maxsize in HoodieCombineHiveInputFormat
(#13134)
new 44293e2bade [HUDI-9309] Fix perf regression from supporting display
dag of insert/update statment in spark ui (#13139)
new b49507ca584 [HUDI-9311] Revert HUDI-7146 which causes perf overhead
for merging MDT log files (#13136)
new 3b56e2a26a0 [HUDI-9159] S3 implementation of StorageLock for
StorageBasedLockProvider (#13126)
new 386be6775b3 [HUDI-9312] Fix the default enable/disable inference for
indices for column_stats, RLI, partition_stats and bloom_filter (#13140)
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (0e05e3a1c76)
\
N -- N -- N refs/heads/release-1.0.2 (386be6775b3)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
The 169 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
hudi-aws/pom.xml | 24 +
.../aws/transaction/lock/S3StorageLockClient.java | 270 ++++++++++
.../lock/TestS3StorageBasedLockProvider.java | 159 ++++++
.../transaction/lock/TestS3StorageLockClient.java | 338 ++++++++++++
.../transaction/lock/StorageBasedLockProvider.java | 529 +++++++++++++++++++
.../{StorageLock.java => StorageLockClient.java} | 33 +-
.../lock/models/LockProviderHeartbeatManager.java | 2 +-
...LockUpdateResult.java => LockUpsertResult.java} | 4 +-
.../transaction/lock/models/StorageLockFile.java | 12 +-
.../org/apache/hudi/config/HoodieWriteConfig.java | 60 +--
.../apache/hudi/config/StorageBasedLockConfig.java | 100 ++++
.../index/bucket/ConsistentBucketIdentifier.java | 8 +-
.../lock/StorageBasedLockProviderTestBase.java | 290 ++++++++++
.../lock/TestStorageBasedLockProvider.java | 587 +++++++++++++++++++++
...ileTest.java => StorageLockClientFileTest.java} | 6 +-
.../hudi/config/TestStorageBasedLockConfig.java | 101 ++++
.../hudi/client/model/HoodieFlinkInternalRow.java | 135 +++++
.../model/HoodieFlinkInternalRowSerializer.java | 170 ++++++
.../model/HoodieFlinkInternalRowTypeInfo.java | 104 ++++
.../hudi/index/SparkMetadataTableRecordIndex.java | 9 +-
.../hudi/client/functional/TestHoodieIndex.java | 48 +-
.../org/apache/hudi/BaseHoodieTableFileIndex.java | 17 +
.../hudi/common/config/HoodieMetadataConfig.java | 58 +-
.../hudi/common/table/HoodieTableMetaClient.java | 2 +
.../table/log/block/HoodieHFileDataBlock.java | 11 -
.../hudi/common/table/view/NoOpTableMetadata.java | 9 +-
.../apache/hudi/metadata/BaseTableMetadata.java | 20 +-
.../metadata/FileSystemBackedTableMetadata.java | 9 +-
.../hudi/metadata/HoodieBackedTableMetadata.java | 149 ------
.../metadata/HoodieMetadataLogRecordReader.java | 23 +-
.../apache/hudi/metadata/HoodieTableMetadata.java | 16 +-
.../hudi/metadata/MetadataPartitionType.java | 43 +-
.../hudi/common/util/TestReflectionUtils.java | 15 +
.../hudi/metadata/TestMetadataPartitionType.java | 48 +-
.../org/apache/hudi/sink/StreamWriteFunction.java | 103 +++-
.../org/apache/hudi/sink/StreamWriteOperator.java | 14 +-
.../hudi/sink/bootstrap/BootstrapOperator.java | 37 +-
.../bootstrap/batch/BatchBootstrapOperator.java | 13 +-
.../sink/bucket/BucketStreamWriteFunction.java | 45 +-
.../sink/bucket/BucketStreamWriteOperator.java | 16 +-
.../bucket/ConsistentBucketAssignFunction.java | 23 +-
.../ConsistentBucketStreamWriteFunction.java | 14 +-
.../sink/partitioner/BucketAssignFunction.java | 81 +--
.../sink/transform/RowDataToHoodieFunction.java | 87 +--
.../RowDataToHoodieFunctionWithRateLimit.java | 4 +-
.../sink/transform/RowDataToHoodieFunctions.java | 7 +-
.../apache/hudi/sink/utils/PayloadCreation.java | 9 -
.../java/org/apache/hudi/sink/utils/Pipelines.java | 64 ++-
.../apache/hudi/streamer/HoodieFlinkStreamer.java | 6 +-
.../org/apache/hudi/table/HoodieTableSink.java | 10 +-
.../apache/hudi/sink/ITTestDataStreamWrite.java | 94 ++--
.../hudi/sink/bucket/ITTestBucketStreamWrite.java | 58 +-
.../bucket/ITTestConsistentBucketStreamWrite.java | 6 +-
.../utils/BucketStreamWriteFunctionWrapper.java | 16 +-
...ConsistentBucketStreamWriteFunctionWrapper.java | 15 +-
.../sink/utils/StreamWriteFunctionWrapper.java | 25 +-
.../org/apache/hudi/common/util/HFileUtils.java | 45 +-
.../hudi/io/hadoop/HoodieAvroHFileWriter.java | 5 +-
.../hudi/io/hadoop/HoodieHBaseAvroHFileReader.java | 83 +--
.../apache/hudi/common/fs/TestStorageSchemes.java | 9 +
.../hadoop/TestHoodieHBaseHFileReaderWriter.java | 62 ---
.../io/hadoop/TestHoodieHFileReaderWriterBase.java | 2 +-
.../hudi/hadoop/HiveHoodieTableFileIndex.java | 18 -
.../hadoop/hive/HoodieCombineHiveInputFormat.java | 11 +-
.../hive/TestHoodieCombineHiveInputFormat.java | 41 +-
.../apache/hudi/common/util/ReflectionUtils.java | 16 +
.../org/apache/hudi/storage/StorageSchemes.java | 94 ++--
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 10 +-
.../org/apache/hudi/RecordLevelIndexSupport.scala | 6 +-
.../org/apache/hudi/SecondaryIndexSupport.scala | 6 +-
.../apache/hudi/SparkHoodieTableFileIndex.scala | 37 +-
.../spark/sql/hudi/analysis/HoodieAnalysis.scala | 23 +-
.../command/CreateHoodieTableAsSelectCommand.scala | 12 +-
.../command/InsertIntoHoodieTableCommand.scala | 23 +-
.../hudi/command/UpdateHoodieTableCommand.scala | 88 +--
.../functional/TestHoodieBackedMetadata.java | 13 +-
.../hudi/functional/RecordLevelIndexTestBase.scala | 5 +-
.../hudi/functional/TestColumnStatsIndex.scala | 4 +-
.../hudi/functional/TestMetadataRecordIndex.scala | 5 +-
.../hudi/functional/TestPartitionStatsIndex.scala | 4 +-
.../functional/TestPartitionStatsPruning.scala | 4 +-
.../apache/hudi/utilities/TestHoodieIndexer.java | 4 +-
pom.xml | 15 +-
83 files changed, 3705 insertions(+), 1096 deletions(-)
create mode 100644
hudi-aws/src/main/java/org/apache/hudi/aws/transaction/lock/S3StorageLockClient.java
create mode 100644
hudi-aws/src/test/java/org/apache/hudi/aws/transaction/lock/TestS3StorageBasedLockProvider.java
create mode 100644
hudi-aws/src/test/java/org/apache/hudi/aws/transaction/lock/TestS3StorageLockClient.java
create mode 100644
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/StorageBasedLockProvider.java
rename
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/{StorageLock.java
=> StorageLockClient.java} (62%)
rename
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/models/{LockUpdateResult.java
=> LockUpsertResult.java} (95%)
create mode 100644
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/StorageBasedLockConfig.java
create mode 100644
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/lock/StorageBasedLockProviderTestBase.java
create mode 100644
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/lock/TestStorageBasedLockProvider.java
rename
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/lock/models/{StorageLockFileTest.java
=> StorageLockClientFileTest.java} (97%)
create mode 100644
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestStorageBasedLockConfig.java
create mode 100644
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/model/HoodieFlinkInternalRow.java
create mode 100644
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/model/HoodieFlinkInternalRowSerializer.java
create mode 100644
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/model/HoodieFlinkInternalRowTypeInfo.java