This is an automated email from the ASF dual-hosted git repository.
codope pushed a change to branch branch-0.x
in repository https://gitbox.apache.org/repos/asf/hudi.git
from 0908f648152 [HUDI-6999] Adding row writer support to HoodieStreamer
(#9913)
add 72d9d3dcb59 [HUDI-7001] ComplexAvroKeyGenerator should represent
single record key as the value string without composing the key field name
(#9936)
add d4a09b28116 [MINOR] Remove rocksdb version from m1 profile (#10006)
add b72aa87f9b9 [HUDI-7010] Build clustering group reduces redundant
traversals (#9957)
add a817da87965 [HUDI-7039] PartialUpdateAvroPayload preCombine failed
need show details (#10000)
add 9a8b8b7830b [HUDI-7048] Fix checkpoint loss issue when changing MOR to
COW in streamer (#10001)
add fd81c2cc3b9 [HUDI-7033] Fix read error for schema evolution +
partition value extraction (#9994)
add 1793435a43b [MINOR] Fix tests that set precombine to nonexistent field
(#10008)
add 13ed45bc2a5 [HUDI-7030] Update containsInstant without
containsOrBeforeTimelineStarts to fix data lost (#9982)
add dc265a5511f ShowPartitionsCommand should consider lazy
delete_partitions (#10019)
add 7973a67dc06 [HUDI-7017] Prevent full schema evolution from wrongly
falling back to OOB schema evolution (#9966)
add b295af310bf [HUDI-6872] Simplify Out Of Box Schema Evolution
Functionality (#9743)
add 805bca003aa [HUDI-7054][FOLLOW_UP] HoodieCatalogTable should ignore
lazily deleted partitions (#10024)
add 55985106615 [MINOR] Add logs in PartitionAwareClusteringPlanStrategy
(#10051)
add 77692b44a4b [HUDI-7085] Update release scripts (#10072)
add dbc51a894b8 [HUDI-7035] Fix CDC Incremental Read When First Write
Contains Delete And Upsert (#10071)
add 900cfb3fd27 [MINOR] Fix npe for get internal schema (#9984)
add ae8eca41076 [Minor] Throws an exception when using bulk_insert and
stream mode (#10082)
add 162f1800f38 [HUDI-7094]
AlterTableAddColumnCommand/AlterTableChangeColumnCommand update table with
ro/rt suffix (#10094)
add ef83ee5208c [MINOR] Add detailed error logs in RunCompactionProcedure
(#10070)
add b8edbd091a0 [HUDI-5936] Fix serialization problem when FileStatus is
not serializable (#10065)
add 69e0a6895b9 [Minor] Throw exceptions when cleaner/compactor fail
(#10108)
add e640feb8131 [MINOR] Modified description to include missing trigger
strategy (#10114)
add 9361e4505b0 [MINOR] Removing unnecessary guards to row writer (#10004)
add a9cd902bb53 [HUDI-7109] Fix Flink may re-use a committed instant in
append mode (#10119)
add 0f5fb62a2fb [HUDI-7116] Add docker image for flink 1.14 and spark
2.4.8 (#10126)
add 640ed7d4d4b [HUDI-7119] Don't write precombine field to
hoodie.properties when the ts field does not exist for append mode (#10133)
add 7796ed8aa84 [HUDI-7098] Add max bytes per partition with cloud stores
source in DS (#10100)
add c54b40ea48e Fix schema refresh for KafkaAvroSchemaDeserializer (#10118)
add b8ea19ad046 [HUDI-7111] Fix performance regression of tag when written
into simple bucket index table (#10130)
add eabe86af2b3 [HUDI-7118] Set conf
'spark.sql.parquet.enableVectorizedReader' to true automatically only if the
value is not explicitly set (#10134)
add 5e18a583c9a [HUDI-7107] Reused MetricsReporter fails to publish
metrics in Spark streaming job (#10132)
add 27930041f3a [HUDI-7127] Fixing set up and tear down in tests (#10146)
add 6b91cfbc130 [MINOR] Misc fixes in deltastreamer (#10067)
add 3be3283e7c3 [HUDI-7083] Adding support for multiple tables with
Prometheus Reporter (#10068)
add 8b86dd00de9 [HUDI-7003] Add option to fallback to full table scan if
files are deleted due to cleaner (#9941)
add 82cb7fef27e [HUDI-7106] Fix sqs deletes, deltasync service close and
error table default configs. (#10117)
add 301f8d81aa6 [HUDI-7084] Fixing schema retrieval for table w/ no
commits (#10069)
add d1f39b9132d [HUDI-7115] Add in new options for the bigquery sync
(#10125)
add 87a426abe02 [HUDI-7096] Improving incremental query to fetch
partitions based on commit metadata (#10098)
add fa9c5a14914 [HUDI-7129] Fix bug when upgrade from table version three
using UpgradeOrDowngradeProcedure (#10147)
add f45006cb794 [HUDI-6961] Fixing DefaultHoodieRecordPayload to honor
deletion based on meta field as well as custome delete marker (#10150)
add a925b8cfc6a [HUDI-7004] Add support of snapshotLoadQuerySplitter in
s3/gcs sources (#10152)
add 91f6165b852 [MINOR] Remove unused import (#10159)
add 5c0a150770b [HUDI-7052] Fix partition key validation for custom key
generators. (#10014)
add 62d9268e59f Removing unused imports
add 6e8a2aff7fc [HUDI-7112] Reuse existing timeline server and performance
improvements (#10122)
add 74ef03d6c1a [MINOR] Making misc fixes to deltastreamer sources(S3 and
GCS) (#10095)
add 02c8097d0a4 [HUDI-7120] Performance improvements in deltastreamer
executor code path (#10135)
add d78a2f3b4f6 [HUDI-7034] Fix refresh table/view (#10151)
add 4765f3edead [HUDI-7086] Scaling gcs event source (#10073)
add 39613621ac7 [HUDI-7095] Making perf enhancements to JSON serde (#10097)
add e90616c5f01 Fixing build failures
add 9250a624dd1 [HUDI-7006] Reduce unnecessary is_empty rdd calls in
StreamSync (#10158)
add c9a39d7b87e [HUDI-7139] Fix operation type for bulk insert with row
writer in Hudi Streamer (#10175)
add a26d564455c [HUDI-7041] Optimize the memory usage of timeline server
for table service (#10002)
add bce8f8d3cc8 Fixing build failures
add 1951d805c34 [HUDI-7097] Fix instantiation of Hms Uri with HiveSync
tool (#10099)
add 461e14bb4b2 [MINOR] Schema Converter should use default identity
transform if not specified (#10178)
add 28facfe8cdb [HUDI-7147] Fix CDC write flush bug (#10186)
add a6d29e2fd31 [HUDI-7148] Add an additional fix to the potential thread
insecurity problem of heartbeat client (#10188)
add 2a0f18b73c9 [minor] when metric prefix length is 0 ignore the metric
prefix (#10190)
add dbeda41f15d [HUDI-7086] Fix the default for gcp pub sub max sync time
to 1min (#10171)
add 7d8ce155ad5 [HUDI-7138] Fix error table writer and schema registry
provider (#10173)
add 3c894596a90 Fixing build failures
add 3e9e3dfd102 [HUDI-7128] DeleteMarkerProcedures support delete in batch
mode (#10148)
add 8f5bdc79c68 [HUDI-7128][FOLLOW-UP] support metadatadelete with batch
mode (#10210)
add 1338e2998d5 [HUDI-7161] Add commit action type and extra metadata to
write callback on commit message (#10213)
add a9ac4a84bfe [HUDI-7160] Copy over schema properties when adding Hudi
Metadata fields (#10212)
add 3f0cf232ffb Fixing failing test
add bd86803c558 [HUDI-7165] Flink multi writer not close the failed
instant heartbeat (#10221)
add ee8b3ca15b1 [HUDI-7153] Fixing range overflow with kakfa source and
spark partition management (#10205)
add c6c3bd3d35c [HUDI-6217] Spark reads should skip record with delete
operation metadata (#10219)
add d5e36cef87d [HUDI-7071] Throw exceptions when clustering/index job
fail (#10050)
add a96a21d9589 [HUDI-7154] Fix NPE from empty batch with row writer
enabled in Hudi Streamer (#10198)
add a3bc5f141ca [HUDI-6822] Fix deletes handling in hbase index when
partition path is updated (#9630)
add 3921f0f5a96 Fixing compilation issues
add 21fdee50b88 [HUDI-7165][FOLLOW-UP] Add test case for stopping
heartbeat for un-committed events (#10230)
add 1f6b45d6a48 [HUDI-7100] Fixing insert overwrite operations with drop
dups config (#10222)
add 1a0757b9691 [HUDI-6980] Fixing closing of write client on failure
scenarios (#10224)
add 574d9561fdf [MINOR] Fixing view manager reuse with Embedded timeline
server (#10240)
add a5b7b26cf75 [MINOR] Allow concurrent modification for heartbeat map
(#10215)
add b4debe5d82a [MINOR] Fixing integ test writer for commit time
generation (#10243)
add 00d6025996b [MINOR] Fixing streamer props in integ tests (#10260)
add 68f37119ad1 [HUDI-7199] Optimize contains impl with
HoodieDefaultTimeline (#10284)
add 7cef60af873 [HUDI-7189] Fix Flink catalog keygen class of table
properties for non partitioned table (#10227)
add 8d9017d647b [HUDI-7173] Fix hudi-on-flink read issues involving schema
evolution and decimal types (#10247)
add 50497f24965 Fixing decimal fix for flink 1.13.x
add a881f62cca2 [HUDI-7169] Comparison between defaultParName and
partValue (#10234)
add 8749d6d31af [HUDI-7136] In the dfs catalog scenario, solve the problem
of Primary key definition is missing (#10162)
add e0aa7a1b2ec [HUDI-7185] Fix call show_fsview_all failure error due to
not specify partition path (#10257)
add ed3ecf36bdc [HUDI-7191] Create table should shutdown with exception
when occur catalog sync error (#10269)
add f801bbb967e [HUDI-7135] Spark reads hudi table error when flink
creates the table without precombine key (#10157)
add 511a6c5bbea [HUDI-7196] Call register metric before rollback
compcation (#10268)
add 4c64f498e71 [MINOR] Relaxing required props with defaults (#10259)
add 1056241607e [HUDI-6954] Fixing unpartitioned datasets for col stats
and bloom filter partition in MDT (#10251)
add 61c135f22c9 [HUDI-7159]Check the table type between hoodie.properies
and table options (#10209)
add 4c12e5eeca1 [HUDI-6012] Delete base path when failed to run bootstrap
procedure (#8349)
add f17618a57e0 [HUDI-6094] make utilities kafka send call from async to
sync (#8489)
add 4dc6a1e5bc0 [HUDI-7206] Fixing auto deletion of mdt (#10292)
add 1dfeda49c78 [HUDI-7201] Schema Evolution: use target schema if source
is empty (#10288)
add 75d06238e3d [HUDI-7171] Fix 'show partitions' not display rewritten
partitions (#10242)
add 790903712ec [HUDI-7040] Handle dropping of partition columns in
BulkInsertDataInternalWriterHelper::write(...) (#10272)
add 080d2f9f08f [HUDI-7210] In CleanFunction#open, triggers the cleaning
under option 'clean.async.enabled' (#10298)
add 549a80bf865 [HUDI-7132] Data may be lost for flink task failure
(#10312)
add bd59a866ea8 [MINOR] NPE fix while adding projection field & added its
test cases (#10313)
add e4fd81f1b25 [HUDI-7183] Fix static insert overwrite partitions issue
(#10254)
add d1a43dc3694 [HUDI-7223] Cleaner KEEP_LATEST_BY_HOURS should retain
latest commit before earliest commit to retain (#10307)
add 283f18b3032 Bumping release candidate number 1 for 0.14.1
add 9a9f13dccf5 Fixing log reader eager closure
add dff42eb468c Add cachedSchema per batch, fix idempotency with
getSourceSchema calls
add 6b13f98dbee [HUDI-7236] Fix mit when changing partition paths with
global index
add e53f184aa97 Fix scala typedprops conversion for schema evol
add a25116ec53d Fixing compilation issues
add 3531b730392 Fixing MIT and global index tests
add 73914cebbda Fixing failing test: Test Call
repair_overwrite_hoodie_props Procedure
add d651b17cd84 [MINOR] Add StorageSchemes for Aliyun Apsara File Storage
for HDFS (#10391)
add 52309055f0c Revert "Add cachedSchema per batch, fix idempotency with
getSourceSchema calls"
add 548b10c7d70 Fix dynamodb http endpoing
add 420ad9026cf Fix missing datadog configuration metrics on mdt
add 66cff7d7642 Bumping release candidate number 2
add 5b0d67bc798 [MINOR] Update release version to reflect published
version 0.14.1
No new revisions were added by this update.
Summary of changes:
...essive-clean-archival-inline-compact.properties | 14 +-
docker/hoodie/hadoop/base/pom.xml | 2 +-
docker/hoodie/hadoop/base_java11/pom.xml | 2 +-
docker/hoodie/hadoop/datanode/pom.xml | 2 +-
docker/hoodie/hadoop/historyserver/pom.xml | 2 +-
docker/hoodie/hadoop/hive_base/pom.xml | 2 +-
docker/hoodie/hadoop/namenode/pom.xml | 2 +-
docker/hoodie/hadoop/pom.xml | 2 +-
docker/hoodie/hadoop/prestobase/pom.xml | 2 +-
docker/hoodie/hadoop/spark_base/pom.xml | 2 +-
docker/hoodie/hadoop/sparkadhoc/pom.xml | 2 +-
docker/hoodie/hadoop/sparkmaster/pom.xml | 2 +-
docker/hoodie/hadoop/sparkworker/pom.xml | 2 +-
docker/hoodie/hadoop/trinobase/pom.xml | 2 +-
docker/hoodie/hadoop/trinocoordinator/pom.xml | 2 +-
docker/hoodie/hadoop/trinoworker/pom.xml | 2 +-
hudi-aws/pom.xml | 4 +-
.../lock/DynamoDBBasedLockProvider.java | 2 +-
hudi-cli/pom.xml | 2 +-
hudi-client/hudi-client-common/pom.xml | 4 +-
.../common/HoodieWriteCommitCallbackMessage.java | 36 +-
.../org/apache/hudi/client/BaseHoodieClient.java | 2 +-
.../hudi/client/BaseHoodieTableServiceClient.java | 4 +
.../apache/hudi/client/BaseHoodieWriteClient.java | 3 +-
.../embedded/EmbeddedTimelineServerHelper.java | 38 +-
.../client/embedded/EmbeddedTimelineService.java | 169 +++++-
.../client/heartbeat/HoodieHeartbeatClient.java | 9 +-
.../lock/ZookeeperBasedLockProvider.java | 2 -
.../org/apache/hudi/config/HoodieWriteConfig.java | 8 +-
.../org/apache/hudi/index/HoodieIndexUtils.java | 108 +++-
.../hudi/index/bucket/HoodieBucketIndex.java | 35 --
.../index/bucket/HoodieConsistentBucketIndex.java | 29 +-
.../hudi/index/bucket/HoodieSimpleBucketIndex.java | 54 +-
.../java/org/apache/hudi/io/HoodieCDCLogger.java | 23 +-
.../hudi/io/HoodieKeyLocationFetchHandle.java | 4 +-
.../org/apache/hudi/io/HoodieMergedReadHandle.java | 5 +-
.../AutoRecordGenWrapperAvroKeyGenerator.java | 27 +-
.../AutoRecordKeyGeneratorWrapper.java} | 19 +-
.../hudi/keygen/ComplexAvroKeyGenerator.java | 3 +
.../metadata/HoodieBackedTableMetadataWriter.java | 79 +--
.../hudi/metadata/HoodieMetadataWriteUtils.java | 17 +
.../org/apache/hudi/metrics/HoodieMetrics.java | 10 +-
.../main/java/org/apache/hudi/metrics/Metrics.java | 2 +
.../metrics/prometheus/PrometheusReporter.java | 77 ++-
.../java/org/apache/hudi/table/HoodieTable.java | 10 +-
.../action/clean/CleanPlanActionExecutor.java | 30 +-
.../hudi/table/action/clean/CleanPlanner.java | 35 +-
.../cluster/strategy/ClusteringPlanStrategy.java | 2 +-
.../PartitionAwareClusteringPlanStrategy.java | 6 +
.../BaseHoodieCompactionPlanGenerator.java | 2 +-
.../marker/TimelineServerBasedWriteMarkers.java | 13 +-
.../table/upgrade/ThreeToFourUpgradeHandler.java | 6 +
.../embedded/TestEmbeddedTimelineService.java | 189 +++++++
.../hudi/keygen/TestComplexAvroKeyGenerator.java | 88 +++
.../apache/hudi/table/action/TestCleanPlanner.java | 336 ++++++++++++
hudi-client/hudi-flink-client/pom.xml | 4 +-
hudi-client/hudi-java-client/pom.xml | 4 +-
.../client/TestHoodieJavaWriteClientInsert.java | 6 +-
.../hudi/client/TestJavaHoodieBackedMetadata.java | 2 +-
hudi-client/hudi-spark-client/pom.xml | 4 +-
.../hudi/execution/SparkLazyInsertIterable.java | 3 +-
.../index/bloom/SparkHoodieBloomIndexHelper.java | 3 +-
.../hudi/index/hbase/SparkHoodieHBaseIndex.java | 4 +
.../keygen/AutoRecordGenWrapperKeyGenerator.java | 48 +-
.../apache/hudi/keygen/BuiltinKeyGenerator.java | 6 +-
.../commit/BulkInsertDataInternalWriterHelper.java | 34 +-
.../SparkInsertOverwriteCommitActionExecutor.java | 17 +-
.../org/apache/hudi/AvroConversionUtils.scala | 55 +-
.../org/apache/hudi/HoodieConversionUtils.scala | 4 +-
.../hudi/HoodieDatasetBulkInsertHelper.scala | 31 +-
.../org/apache/hudi/util/SparkKeyGenUtils.scala | 31 +-
.../hudi/client/TestHoodieClientMultiWriter.java | 35 +-
.../hudi/client/TestSparkRDDWriteClient.java | 6 +-
.../functional/TestHoodieBackedMetadata.java | 23 +-
.../TestHoodieClientOnMergeOnReadStorage.java | 10 +-
.../hudi/client/functional/TestHoodieIndex.java | 21 +
.../TestRemoteFileSystemViewWithMetadataTable.java | 42 +-
.../hudi/common/fs/NonSerializableFileSystem.java | 115 ++++
.../fs/TestHoodieSerializableFileStatus.java | 86 +++
.../index/hbase/TestSparkHoodieHBaseIndex.java | 95 ++--
...TestSparkBuildClusteringGroupsForPartition.java | 30 ++
.../table/functional/TestCleanPlanExecutor.java | 25 +-
hudi-client/pom.xml | 2 +-
hudi-common/pom.xml | 12 +-
.../org/apache/hudi/BaseHoodieTableFileIndex.java | 32 +-
.../apache/hudi/avro/AvroSchemaCompatibility.java | 109 ++--
.../java/org/apache/hudi/avro/AvroSchemaUtils.java | 55 +-
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 186 +++++--
.../hudi/common/config/HoodieCommonConfig.java | 10 +
.../apache/hudi/common/config/TypedProperties.java | 5 +
.../java/org/apache/hudi/common/fs/FSUtils.java | 23 +-
.../common/fs/HoodieSerializableFileStatus.java | 144 +++++
.../org/apache/hudi/common/fs/StorageSchemes.java | 4 +-
.../apache/hudi/common/model/CleanFileInfo.java | 18 +
.../common/model/DefaultHoodieRecordPayload.java | 29 +-
.../org/apache/hudi/common/model/HoodieRecord.java | 23 +-
.../hudi/common/model/HoodieRecordDelegate.java | 32 +-
.../common/model/PartialUpdateAvroPayload.java | 5 +
.../hudi/common/model/WriteOperationType.java | 7 +
.../hudi/common/table/TableSchemaResolver.java | 64 ++-
.../hudi/common/table/cdc/HoodieCDCExtractor.java | 3 +-
.../hudi/common/table/log/HoodieLogFileReader.java | 50 +-
.../common/table/log/HoodieLogFormatReader.java | 29 +-
.../table/log/block/HoodieAvroDataBlock.java | 25 +-
.../common/table/log/block/HoodieCDCDataBlock.java | 5 +-
.../common/table/log/block/HoodieCommandBlock.java | 5 +-
.../common/table/log/block/HoodieCorruptBlock.java | 5 +-
.../common/table/log/block/HoodieDataBlock.java | 5 +-
.../common/table/log/block/HoodieDeleteBlock.java | 5 +-
.../table/log/block/HoodieHFileDataBlock.java | 5 +-
.../common/table/log/block/HoodieLogBlock.java | 11 +-
.../table/log/block/HoodieParquetDataBlock.java | 5 +-
.../table/timeline/HoodieActiveTimeline.java | 4 +-
.../table/timeline/HoodieDefaultTimeline.java | 57 +-
.../hudi/common/table/timeline/TimelineUtils.java | 64 ++-
.../hudi/common/table/timeline/dto/DTOUtils.java | 4 +-
.../table/view/AbstractTableFileSystemView.java | 133 ++++-
.../table/view/HoodieTableFileSystemView.java | 5 +
.../table/view/PriorityBasedFileSystemView.java | 10 +
.../view/RemoteHoodieTableFileSystemView.java | 97 ++--
.../table/view/RocksDbBasedFileSystemView.java | 6 +
.../common/table/view/TableFileSystemView.java | 25 +
.../hudi/common/util/InternalSchemaCache.java | 6 +-
.../hudi/common/util/RocksDBSchemaHelper.java | 4 +
.../org/apache/hudi/common/util/StringUtils.java | 10 +
.../convert/AvroInternalSchemaConverter.java | 21 +
.../schema/utils/AvroSchemaEvolutionUtils.java | 87 ++-
.../internal/schema/utils/SchemaChangeUtils.java | 14 +-
.../apache/hudi/metadata/BaseTableMetadata.java | 4 +-
.../metadata/FileSystemBackedTableMetadata.java | 28 +-
.../hudi/metadata/HoodieMetadataPayload.java | 17 +-
.../hudi/metadata/HoodieTableMetadataUtil.java | 38 +-
.../org/apache/hudi/avro/TestHoodieAvroUtils.java | 25 +
.../model/TestDefaultHoodieRecordPayload.java | 9 +-
.../hudi/common/testutils/HoodieTestUtils.java | 10 +-
.../apache/hudi/common/util/TestStringUtils.java | 7 +
.../schema/utils/TestAvroSchemaEvolutionUtils.java | 15 +
hudi-common/src/test/resources/nullRight.avsc | 213 ++++++++
hudi-common/src/test/resources/nullWrong.avsc | 203 +++++++
.../src/test/resources}/source_evolved.avsc | 6 +-
hudi-examples/hudi-examples-common/pom.xml | 2 +-
hudi-examples/hudi-examples-flink/pom.xml | 2 +-
hudi-examples/hudi-examples-java/pom.xml | 2 +-
hudi-examples/hudi-examples-spark/pom.xml | 2 +-
hudi-examples/pom.xml | 2 +-
hudi-flink-datasource/hudi-flink/pom.xml | 4 +-
.../apache/hudi/configuration/FlinkOptions.java | 4 +-
.../java/org/apache/hudi/sink/CleanFunction.java | 18 +-
.../hudi/sink/StreamWriteOperatorCoordinator.java | 11 +-
.../hudi/sink/append/AppendWriteFunction.java | 2 +-
.../hudi/sink/compact/CompactionPlanOperator.java | 2 +-
.../apache/hudi/source/prune/PartitionPruners.java | 2 +-
.../org/apache/hudi/table/HoodieTableFactory.java | 32 +-
.../org/apache/hudi/table/HoodieTableSink.java | 5 +
.../apache/hudi/table/catalog/HoodieCatalog.java | 40 +-
.../hudi/table/catalog/HoodieHiveCatalog.java | 11 +
.../java/org/apache/hudi/util/CompactionUtil.java | 2 +-
.../java/org/apache/hudi/util/StreamerUtil.java | 21 +
.../sink/TestStreamWriteOperatorCoordinator.java | 67 +++
.../org/apache/hudi/sink/TestWriteCopyOnWrite.java | 97 ++--
.../org/apache/hudi/sink/utils/TestWriteBase.java | 1 +
.../apache/hudi/table/ITTestSchemaEvolution.java | 100 ++--
.../apache/hudi/table/TestHoodieTableFactory.java | 28 +-
.../hudi/table/catalog/TestHoodieCatalog.java | 27 +
.../hudi/table/catalog/TestHoodieHiveCatalog.java | 53 +-
.../org/apache/hudi/utils/TestConfigurations.java | 4 +-
hudi-flink-datasource/hudi-flink1.13.x/pom.xml | 4 +-
hudi-flink-datasource/hudi-flink1.14.x/pom.xml | 4 +-
.../table/format/cow/ParquetSplitReaderUtil.java | 28 +-
.../table/format/cow/vector/HeapDecimalVector.java | 37 +-
hudi-flink-datasource/hudi-flink1.15.x/pom.xml | 4 +-
.../table/format/cow/ParquetSplitReaderUtil.java | 28 +-
.../table/format/cow/vector/HeapDecimalVector.java | 37 +-
hudi-flink-datasource/hudi-flink1.16.x/pom.xml | 4 +-
.../table/format/cow/ParquetSplitReaderUtil.java | 28 +-
.../table/format/cow/vector/HeapDecimalVector.java | 37 +-
hudi-flink-datasource/hudi-flink1.17.x/pom.xml | 4 +-
.../table/format/cow/ParquetSplitReaderUtil.java | 28 +-
.../table/format/cow/vector/HeapDecimalVector.java | 37 +-
hudi-flink-datasource/pom.xml | 4 +-
hudi-gcp/pom.xml | 5 +-
.../hudi/gcp/bigquery/BigQuerySyncConfig.java | 20 +
.../apache/hudi/gcp/bigquery/BigQuerySyncTool.java | 23 +-
.../gcp/bigquery/HoodieBigQuerySyncClient.java | 58 +-
.../hudi/gcp/bigquery/TestBigQuerySyncConfig.java | 2 +-
.../hudi/gcp/bigquery/TestBigQuerySyncTool.java | 12 +-
.../gcp/bigquery/TestBigQuerySyncToolArgs.java | 8 +-
.../gcp/bigquery/TestHoodieBigQuerySyncClient.java | 26 +-
hudi-hadoop-mr/pom.xml | 2 +-
.../hudi/hadoop/HiveHoodieTableFileIndex.java | 4 +-
.../utils/HoodieRealtimeInputFormatUtils.java | 2 +-
.../utils/TestHoodieRealtimeInputFormatUtils.java | 49 ++
hudi-integ-test/pom.xml | 2 +-
.../testsuite/HoodieDeltaStreamerWrapper.java | 8 +-
.../integ/testsuite/HoodieTestSuiteWriter.java | 4 +-
hudi-kafka-connect/pom.xml | 4 +-
.../hudi-metaserver/hudi-metaserver-client/pom.xml | 2 +-
.../hudi-metaserver/hudi-metaserver-server/pom.xml | 2 +-
hudi-platform-service/hudi-metaserver/pom.xml | 4 +-
hudi-platform-service/pom.xml | 2 +-
hudi-spark-datasource/hudi-spark-common/pom.xml | 4 +-
.../BaseDatasetBulkInsertCommitActionExecutor.java | 3 +-
...setBulkInsertOverwriteCommitActionExecutor.java | 18 +-
...eamerDatasetBulkInsertCommitActionExecutor.java | 10 +-
.../org/apache/hudi/BaseFileOnlyRelation.scala | 8 -
.../scala/org/apache/hudi/DataSourceOptions.scala | 5 +-
.../scala/org/apache/hudi/HoodieBaseRelation.scala | 19 +-
.../apache/hudi/HoodieBootstrapMORRelation.scala | 6 -
.../org/apache/hudi/HoodieDataSourceHelper.scala | 61 ++-
.../scala/org/apache/hudi/HoodieFileIndex.scala | 4 +-
.../scala/org/apache/hudi/HoodieSchemaUtils.scala | 237 ++++++++
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 205 ++++---
.../scala/org/apache/hudi/HoodieWriterUtils.scala | 5 +-
.../org/apache/hudi/IncrementalRelation.scala | 4 +-
.../src/main/scala/org/apache/hudi/Iterators.scala | 66 ++-
.../hudi/MergeOnReadIncrementalRelation.scala | 4 +-
.../apache/hudi/SparkHoodieTableFileIndex.scala | 8 +-
.../sql/catalyst/catalog/HoodieCatalogTable.scala | 17 +-
.../parquet/HoodieParquetFileFormatHelper.scala | 21 +-
.../spark/sql/hudi/ProvidesHoodieConfig.scala | 83 +--
.../AlterHoodieTableAddColumnsCommand.scala | 58 +-
.../AlterHoodieTableChangeColumnCommand.scala | 14 +-
.../hudi/command/CreateHoodieTableCommand.scala | 3 +-
.../command/CreateHoodieTableLikeCommand.scala | 3 +-
.../command/ShowHoodieTablePartitionsCommand.scala | 10 +-
hudi-spark-datasource/hudi-spark/pom.xml | 4 +-
.../apache/hudi/cli/BootstrapExecutorUtils.java | 9 +-
.../command/InsertIntoHoodieTableCommand.scala | 32 +-
.../hudi/command/MergeIntoHoodieTableCommand.scala | 4 +-
.../command/procedures/DeleteMarkerProcedure.scala | 11 +-
.../procedures/DeleteMetadataTableProcedure.scala | 22 +-
.../procedures/DeleteSavepointProcedure.scala | 37 +-
.../procedures/RunCompactionProcedure.scala | 13 +-
.../procedures/ShowFileSystemViewProcedure.scala | 11 +-
.../procedures/UpgradeOrDowngradeProcedure.scala | 15 +-
.../apache/hudi/TestDataSourceReadWithDeletes.java | 182 +++++++
.../org/apache/hudi/functional/TestBootstrap.java | 7 +-
.../TestHoodieDatasetBulkInsertHelper.java | 21 +-
.../hudi/keygen/TestComplexKeyGenerator.java | 2 +-
.../org/apache/hudi/TestAvroConversionUtils.scala | 260 ++++-----
.../hudi/TestAvroSchemaResolutionSupport.scala | 2 +-
.../org/apache/hudi/TestDataSourceDefaults.scala | 2 +-
.../apache/hudi/TestHoodieDataSourceHelper.scala | 54 ++
.../org/apache/hudi/TestHoodieFileIndex.scala | 72 ++-
.../org/apache/hudi/TestHoodieParquetBloom.scala | 54 +-
.../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 50 +-
.../org/apache/hudi/TestHoodieSparkUtils.scala | 29 +-
.../model/TestHoodieRecordSerialization.scala | 12 +-
.../hudi/functional/TestBasicSchemaEvolution.scala | 9 +-
.../apache/hudi/functional/TestCOWDataSource.scala | 83 ++-
.../functional/TestColumnStatsIndexWithSQL.scala | 2 +-
.../TestIncrementalReadWithFullTableScan.scala | 2 +-
.../TestMetadataTableWithSparkDataSource.scala | 118 +++-
.../hudi/functional/TestMetricsReporter.scala | 98 ++++
.../functional/cdc/TestCDCDataFrameSuite.scala | 94 ++++
.../apache/spark/sql/hudi/TestCreateTable.scala | 14 +-
.../apache/spark/sql/hudi/TestInsertTable.scala | 98 ++++
.../apache/spark/sql/hudi/TestMergeIntoTable.scala | 206 +++++++
.../apache/spark/sql/hudi/TestShowPartitions.scala | 139 +++++
.../org/apache/spark/sql/hudi/TestSpark3DDL.scala | 41 ++
.../sql/hudi/procedure/TestCallProcedure.scala | 44 ++
.../sql/hudi/procedure/TestFsViewProcedure.scala | 93 ++++
.../sql/hudi/procedure/TestMetadataProcedure.scala | 58 ++
.../hudi/procedure/TestSavepointsProcedure.scala | 71 +++
.../TestUpgradeOrDowngradeProcedure.scala | 27 +
hudi-spark-datasource/hudi-spark2-common/pom.xml | 2 +-
hudi-spark-datasource/hudi-spark2/pom.xml | 4 +-
hudi-spark-datasource/hudi-spark3-common/pom.xml | 2 +-
hudi-spark-datasource/hudi-spark3.0.x/pom.xml | 4 +-
hudi-spark-datasource/hudi-spark3.1.x/pom.xml | 4 +-
hudi-spark-datasource/hudi-spark3.2.x/pom.xml | 4 +-
.../hudi-spark3.2plus-common/pom.xml | 2 +-
hudi-spark-datasource/hudi-spark3.3.x/pom.xml | 4 +-
hudi-spark-datasource/hudi-spark3.4.x/pom.xml | 4 +-
hudi-spark-datasource/pom.xml | 2 +-
hudi-sync/hudi-adb-sync/pom.xml | 2 +-
hudi-sync/hudi-datahub-sync/pom.xml | 2 +-
hudi-sync/hudi-hive-sync/pom.xml | 2 +-
.../java/org/apache/hudi/hive/HiveSyncTool.java | 21 +-
hudi-sync/hudi-sync-common/pom.xml | 2 +-
hudi-sync/pom.xml | 2 +-
hudi-tests-common/pom.xml | 2 +-
hudi-timeline-service/pom.xml | 2 +-
.../hudi/timeline/service/RequestHandler.java | 19 +-
.../hudi/timeline/service/TimelineService.java | 8 +-
.../timeline/service/handlers/BaseFileHandler.java | 11 +-
.../service/handlers/FileSliceHandler.java | 17 +-
.../timeline/service/handlers/MarkerHandler.java | 4 +-
.../service/handlers/marker/MarkerDirState.java | 3 +-
hudi-utilities/pom.xml | 2 +-
.../org/apache/hudi/utilities/HoodieCleaner.java | 13 +-
.../apache/hudi/utilities/HoodieClusteringJob.java | 13 +-
.../org/apache/hudi/utilities/HoodieCompactor.java | 17 +-
.../org/apache/hudi/utilities/HoodieIndexer.java | 9 +-
.../org/apache/hudi/utilities/UtilHelpers.java | 6 +-
.../kafka/HoodieWriteCommitKafkaCallback.java | 2 +-
.../hudi/utilities/config/CloudSourceConfig.java | 38 +-
.../hudi/utilities/schema/LazyCastingIterator.java | 27 +-
.../utilities/schema/SchemaRegistryProvider.java | 13 +-
.../hudi/utilities/sources/AvroKafkaSource.java | 29 +-
.../sources/GcsEventsHoodieIncrSource.java | 7 +-
.../hudi/utilities/sources/GcsEventsSource.java | 7 +-
.../hudi/utilities/sources/HoodieIncrSource.java | 15 +-
.../sources/S3EventsHoodieIncrSource.java | 19 +-
.../sources/SnapshotLoadQuerySplitter.java | 9 +
.../sources/helpers/CloudObjectsSelector.java | 12 +-
.../helpers/CloudObjectsSelectorCommon.java | 11 +-
.../sources/helpers/CloudStoreIngestionConfig.java | 7 -
.../utilities/sources/helpers/KafkaOffsetGen.java | 8 +-
.../utilities/sources/helpers/QueryRunner.java | 41 +-
.../sources/helpers/gcs/PubsubMessagesFetcher.java | 103 ++--
.../sources/helpers/gcs/PubsubQueueClient.java | 80 +++
.../utilities/streamer/BaseErrorTableWriter.java | 4 +-
.../hudi/utilities/streamer/ErrorTableUtils.java | 2 +-
.../hudi/utilities/streamer/HoodieStreamer.java | 2 +
.../utilities/streamer/HoodieStreamerUtils.java | 100 ++--
.../utilities/streamer/SourceFormatAdapter.java | 5 +-
.../utilities/streamer/SparkSampleWritesUtils.java | 44 +-
.../apache/hudi/utilities/streamer/StreamSync.java | 248 ++++-----
.../deltastreamer/HoodieDeltaStreamerTestBase.java | 27 +
.../deltastreamer/TestHoodieDeltaStreamer.java | 148 ++++-
...TestHoodieDeltaStreamerSchemaEvolutionBase.java | 296 ++++++++++
...oodieDeltaStreamerSchemaEvolutionExtensive.java | 500 +++++++++++++++++
...estHoodieDeltaStreamerSchemaEvolutionQuick.java | 596 +++++++++++++++++++++
.../deltastreamer/TestSparkSampleWritesUtils.java | 4 +-
.../utilities/deltastreamer/TestTransformer.java | 1 +
.../utilities/schema/TestLazyCastingIterator.java | 196 +++++++
.../schema/TestSchemaRegistryProvider.java | 14 +-
.../sources/TestGcsEventsHoodieIncrSource.java | 85 ++-
.../sources/TestS3EventsHoodieIncrSource.java | 78 ++-
.../helpers/gcs/TestPubsubMessagesFetcher.java | 110 ++++
.../data/schema-evolution/endTestEverything.json | 2 +
.../data/schema-evolution/endTypePromotion.json | 2 +
.../schema-evolution/endTypePromotionDropCols.json | 2 +
.../data/schema-evolution/extraLogFiles.json | 6 +
.../extraLogFilesTestEverything.json | 7 +
.../schema-evolution/extraLogFilesTypePromo.json | 7 +
.../data/schema-evolution/newFileGroups.json | 3 +
.../newFileGroupsTestEverything.json | 3 +
.../schema-evolution/newFileGroupsTypePromo.json | 3 +
.../resources/data/schema-evolution/plain.json | 2 +
.../resources/data/schema-evolution/start.json | 6 +
.../data/schema-evolution/startTestEverything.json | 7 +
.../data/schema-evolution/startTypePromotion.json | 7 +
.../data/schema-evolution/testAddAndDropCols.json | 2 +
.../testAddColChangeOrderAllFiles.json | 3 +
.../testAddColChangeOrderSomeFiles.json | 2 +
.../data/schema-evolution/testAddColRoot.json | 2 +
.../data/schema-evolution/testAddColStruct.json | 2 +
.../data/schema-evolution/testAddComplexField.json | 2 +
.../data/schema-evolution/testAddMetaCol.json | 2 +
.../data/schema-evolution/testDropColRoot.json | 2 +
.../data/schema-evolution/testDropColStruct.json | 2 +
.../resources/streamer-config/source_evolved.avsc | 6 +-
.../base/build_flink1146hive239spark248.sh | 26 +
packaging/hudi-aws-bundle/pom.xml | 2 +-
packaging/hudi-cli-bundle/pom.xml | 2 +-
packaging/hudi-datahub-sync-bundle/pom.xml | 2 +-
packaging/hudi-flink-bundle/pom.xml | 2 +-
packaging/hudi-gcp-bundle/pom.xml | 2 +-
packaging/hudi-hadoop-mr-bundle/pom.xml | 2 +-
packaging/hudi-hive-sync-bundle/pom.xml | 2 +-
packaging/hudi-integ-test-bundle/pom.xml | 2 +-
packaging/hudi-kafka-connect-bundle/pom.xml | 2 +-
packaging/hudi-metaserver-server-bundle/pom.xml | 2 +-
packaging/hudi-presto-bundle/pom.xml | 2 +-
packaging/hudi-spark-bundle/pom.xml | 2 +-
packaging/hudi-timeline-server-bundle/pom.xml | 2 +-
packaging/hudi-trino-bundle/pom.xml | 2 +-
packaging/hudi-utilities-bundle/pom.xml | 2 +-
packaging/hudi-utilities-slim-bundle/pom.xml | 2 +-
pom.xml | 26 +-
scripts/release/validate_staged_bundles.sh | 15 +-
scripts/release/validate_staged_release.sh | 2 +-
374 files changed, 9414 insertions(+), 2203 deletions(-)
copy
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/{index/bucket/BucketIndexLocationMapper.java
=> keygen/AutoRecordKeyGeneratorWrapper.java} (67%)
create mode 100644
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/embedded/TestEmbeddedTimelineService.java
create mode 100644
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/TestComplexAvroKeyGenerator.java
create mode 100644
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java
create mode 100644
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/common/fs/NonSerializableFileSystem.java
create mode 100644
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/common/fs/TestHoodieSerializableFileStatus.java
create mode 100644
hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieSerializableFileStatus.java
create mode 100644 hudi-common/src/test/resources/nullRight.avsc
create mode 100644 hudi-common/src/test/resources/nullWrong.avsc
copy {hudi-utilities/src/test/resources/streamer-config =>
hudi-common/src/test/resources}/source_evolved.avsc (97%)
copy hudi-common/src/main/java/org/apache/hudi/common/model/CleanFileInfo.java
=>
hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java
(50%)
copy hudi-common/src/main/java/org/apache/hudi/common/model/CleanFileInfo.java
=>
hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java
(50%)
copy hudi-common/src/main/java/org/apache/hudi/common/model/CleanFileInfo.java
=>
hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java
(50%)
copy hudi-common/src/main/java/org/apache/hudi/common/model/CleanFileInfo.java
=>
hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java
(50%)
create mode 100644
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/utils/TestHoodieRealtimeInputFormatUtils.java
create mode 100644
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceReadWithDeletes.java
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieDataSourceHelper.scala
create mode 100644
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetricsReporter.scala
rename
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIndexLocationMapper.java
=>
hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/LazyCastingIterator.java
(53%)
create mode 100644
hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/gcs/PubsubQueueClient.java
create mode 100644
hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionBase.java
create mode 100644
hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionExtensive.java
create mode 100644
hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java
create mode 100644
hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestLazyCastingIterator.java
create mode 100644
hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/gcs/TestPubsubMessagesFetcher.java
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/endTestEverything.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/endTypePromotion.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/endTypePromotionDropCols.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/extraLogFiles.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/extraLogFilesTestEverything.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/extraLogFilesTypePromo.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/newFileGroups.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/newFileGroupsTestEverything.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/newFileGroupsTypePromo.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/plain.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/start.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/startTestEverything.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/startTypePromotion.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testAddAndDropCols.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testAddColChangeOrderAllFiles.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testAddColChangeOrderSomeFiles.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testAddColRoot.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testAddColStruct.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testAddComplexField.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testAddMetaCol.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testDropColRoot.json
create mode 100644
hudi-utilities/src/test/resources/data/schema-evolution/testDropColStruct.json
create mode 100755
packaging/bundle-validation/base/build_flink1146hive239spark248.sh