Coreixwumo opened a new issue #3999:
URL: https://github.com/apache/hudi/issues/3999
flink on hudi with mor
Steps to reproduce the behavior:
1 create kafka table
2 create hudi mor table with sql
> create table hudi.ods_user_behavior_logic_mor(
> uuid_did string,
> content string,
> client_ip string,
> userid bigint,
> visit_time_ts bigint,
> `event_time` TIMESTAMP(3) COMMENT 'ETL创建时间',
> `etl_update_time` TIMESTAMP(3) COMMENT 'ETL更新时间',
> `visit_date` string
> )PARTITIONED BY (`visit_date`)
> with (
> 'connector' = 'hudi'
> ,'is_generic' = 'true'
> ,'path' = 'obs://xxx'
> ,'hoodie.datasource.write.recordkey.field' = 'uuid_did'
> ,'hoodie.datasource.write.partitionpath.field' = 'visit_date'
> ,'write.precombine.field' = 'etl_update_time'
> ,'write.tasks' = '1'
> ,'table.type' = 'MERGE_ON_READ'
> ,'index.global.enabled' = 'false'
> ,'compaction.delta_commits' = '5'
> ,'compaction.async.enabled' = 'true'
> ,'compaction.trigger.strategy' = 'num_commits'
> ,'compaction.delta_seconds' = '90'
> ,'compaction.tasks' = '1'
> ,'hive_sync.enable' = 'true'
> ,'hive_sync.db' = 'hudi'
> ,'hive_sync.table' = 'ods_user_behavior_mor'
> ,'hive_sync.username' = 'data'
> ,'hive_sync.file_format' = 'PARQUET'
> ,'hive_sync.support_timestamp' = 'true'
> ,'hive_sync.use_jdbc' = 'true'
> ,'hive_sync.jdbc_url' = 'jdbc:hive2://xxx:10000'
> ,'hive_sync.metastore.uris' = 'thrift://xxx:7004'
> ,'hoodie.datasource.hive_sync.partition_extractor_class' =
'hudi.DatePartitionExtractor'
> ,'hoodie.datasource.hive_style_partition' = 'true'
> ,'hive_sync.partition_fields' = 'visit_date'
> ,'hive_sync.auto_create_database' = 'true'
> ,'hive_sync.skip_ro_suffix' = 'false'
> ,'hive_sync.support_timestamp' = 'false'
> ,'read.tasks' = '1'
> ,'read.streaming.enabled' = 'true'
> ,'hoodie.datasource.query.type' = 'snapshot'
> ,'read.streaming.check-interval' = '30'
> ,'hoodie.datasource.merge.type' = 'payload_combine'
> ,'read.utc-timezone' = 'false'
> );
3 select kafka table then insert into hudi table
> insert into hudi.ods_user_behavior_logic_mor
> select CONCAT_WS('_', uuid_did, date_format(eventTime,
'yyyy-MM-dd')),content,client_ip,userid,visit_time_ts,eventTime,
> now() etl_update_time ,date_format(eventTime, 'yyyy-MM-dd')
> from hudi.flink_user_behavior_mor
I get those stacktrace:
type 1
> ERROR org.apache.hudi.sink.compact.CompactionCommitSink [] -
Error while committing compaction instant: 20211111151653
> java.lang.IllegalArgumentException: null
> at
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:31)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.transitionState(HoodieActiveTimeline.java:459)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.transitionState(HoodieActiveTimeline.java:440)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.transitionCompactionInflightToComplete(HoodieActiveTimeline.java:311)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.table.action.compact.AbstractCompactHelpers.completeInflightCompaction(AbstractCompactHelpers.java:50)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.client.HoodieFlinkWriteClient.completeCompaction(HoodieFlinkWriteClient.java:367)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.client.HoodieFlinkWriteClient.commitCompaction(HoodieFlinkWriteClient.java:352)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.sink.compact.CompactionCommitSink.doCommit(CompactionCommitSink.java:123)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
type 2
> ERROR org.apache.hudi.sink.compact.CompactFunction [] -
Executor executes action [Execute compaction for instant 20211111151653 from
task 0] error
> org.apache.hudi.exception.HoodieUpsertException: Failed to initialize
HoodieUpdateHandle for FileId: f8bd7fbd-a1bf-4b2f-bfa8-a5adc3457789 on commit
20211111151653 on path obs://xxx/ods_user_behavior_mor
> at
org.apache.hudi.io.HoodieMergeHandle.init(HoodieMergeHandle.java:192)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.io.HoodieMergeHandle.<init>(HoodieMergeHandle.java:136)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.getUpdateHandle(HoodieFlinkCopyOnWriteTable.java:365)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.handleUpdate(HoodieFlinkCopyOnWriteTable.java:336)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.table.action.compact.HoodieFlinkMergeOnReadTableCompactor.compact(HoodieFlinkMergeOnReadTableCompactor.java:146)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.table.action.compact.FlinkCompactHelpers.compact(FlinkCompactHelpers.java:95)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.sink.compact.CompactFunction.doCompaction(CompactFunction.java:101)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.sink.compact.CompactFunction.lambda$processElement$0(CompactFunction.java:91)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_191]
> at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_191]
> at java.lang.Thread.run(Thread.java:748) [?:1.8.0_191]
> Caused by: org.apache.hadoop.fs.FileAlreadyExistsException:
obs://xxx.parquet already exists
> at
org.apache.hadoop.fs.obs.OBSFileSystem.create(OBSFileSystem.java:873)
~[hadoop-huaweicloud-2.8.3-hw-37.jar:?]
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:929)
~[flink-shaded-hadoop-2-2.8.3-10.0.jar:2.8.3-10.0]
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:910)
~[flink-shaded-hadoop-2-2.8.3-10.0.jar:2.8.3-10.0]
> at
org.apache.hudi.common.fs.HoodieWrapperFileSystem.lambda$create$12(HoodieWrapperFileSystem.java:300)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:100)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.common.fs.HoodieWrapperFileSystem.create(HoodieWrapperFileSystem.java:298)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:295)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:283)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:227)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.io.storage.HoodieParquetWriter.<init>(HoodieParquetWriter.java:56)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
type 3
> ERROR org.apache.hudi.timeline.service.RequestHandler [] -
Got runtime exception servicing request
basepath=obs://xxx/ods_user_behavior&lastinstantts=20211025212215&timelinehash=b5e2d2477ff5ba3e459697463d4402d1246d1df6f13c31e0509bfcbed21cbd6f
> java.lang.IllegalArgumentException: Last known instant from client was
20211025212215 but server has the following timeline
[[20211025190555__clean__COMPLETED], [20211025191055__clean__COMPLETED],
[20211025191555__clean__COMPLETED], [20211025192055__clean__COMPLETED],
[20211025192555__clean__COMPLETED], [20211025193055__clean__COMPLETED],
[20211025193555__clean__COMPLETED], [20211025194055__clean__COMPLETED],
[20211025194555__clean__COMPLETED], [20211025195055__clean__COMPLETED],
[20211025195555__clean__COMPLETED], [20211025200055__clean__COMPLETED],
[20211025200555__clean__COMPLETED], [20211025201055__clean__COMPLETED],
[20211025201555__clean__COMPLETED], [20211025202055__clean__COMPLETED],
[20211025202555__clean__COMPLETED], [20211025203055__clean__COMPLETED],
[20211025203634__clean__COMPLETED], [==>20211025203641__compaction__REQUESTED],
[20211025203642__deltacommit__COMPLETED],
[20211025203757__deltacommit__COMPLETED],
[20211025203948__deltacommit__COMPLETED], [2021102520401
8__deltacommit__COMPLETED], [20211025204111__deltacommit__COMPLETED],
[20211025204157__deltacommit__COMPLETED], [20211025204234__clean__COMPLETED],
[20211025204255__deltacommit__COMPLETED],
[20211025204408__deltacommit__COMPLETED],
[20211025204454__deltacommit__COMPLETED],
[20211025204543__deltacommit__COMPLETED],
[20211025204644__deltacommit__COMPLETED],
[20211025204741__deltacommit__COMPLETED],
[20211025204841__rollback__COMPLETED],
[20211025204843__deltacommit__COMPLETED],
[20211025204941__rollback__COMPLETED],
[20211025204950__deltacommit__COMPLETED],
[20211025205041__deltacommit__COMPLETED],
[20211025205041__rollback__COMPLETED], [20211025205142__rollback__COMPLETED],
[20211025205312__deltacommit__COMPLETED],
[20211025205313__rollback__COMPLETED], [20211025205351__rollback__COMPLETED],
[20211025205352__deltacommit__COMPLETED], [20211025205445__commit__COMPLETED],
[20211025205445__rollback__COMPLETED],
[20211025205446__deltacommit__COMPLETED], [20211025205545__commit__COMPLETED]
, [20211025205546__deltacommit__COMPLETED],
[20211025205546__rollback__COMPLETED],
[20211025205646__deltacommit__COMPLETED],
[20211025205747__deltacommit__COMPLETED],
[20211025205845__rollback__COMPLETED],
[20211025205856__deltacommit__COMPLETED],
[20211025205945__rollback__COMPLETED],
[20211025205951__deltacommit__COMPLETED],
[20211025210045__rollback__COMPLETED], [20211025210046__commit__COMPLETED],
[20211025210047__deltacommit__COMPLETED],
[20211025210147__rollback__COMPLETED],
[20211025210205__deltacommit__COMPLETED],
[20211025210257__deltacommit__COMPLETED], [20211025210338__clean__COMPLETED],
[20211025210347__rollback__COMPLETED],
[20211025210426__deltacommit__COMPLETED],
[20211025210427__rollback__COMPLETED], [20211025210541__rollback__COMPLETED],
[20211025210543__deltacommit__COMPLETED], [20211025210603__clean__COMPLETED],
[20211025210647__commit__COMPLETED], [20211025210647__rollback__COMPLETED],
[20211025210648__deltacommit__COMPLETED], [20211025210712__deltacommit__COMPLE
TED], [20211025210811__rollback__COMPLETED],
[20211025210812__deltacommit__COMPLETED],
[20211025210911__rollback__COMPLETED],
[20211025210912__deltacommit__COMPLETED],
[20211025211011__rollback__COMPLETED],
[20211025211013__deltacommit__COMPLETED], [20211025211103__clean__COMPLETED],
[20211025211134__commit__COMPLETED], [20211025211135__deltacommit__COMPLETED],
[20211025211136__rollback__COMPLETED],
[20211025211233__deltacommit__COMPLETED],
[20211025211317__rollback__COMPLETED],
[20211025211318__deltacommit__COMPLETED],
[20211025211414__rollback__COMPLETED],
[20211025211415__deltacommit__COMPLETED],
[20211025211510__rollback__COMPLETED],
[20211025211511__deltacommit__COMPLETED], [20211025211603__clean__COMPLETED],
[20211025211610__commit__COMPLETED], [20211025211611__deltacommit__COMPLETED],
[20211025211611__rollback__COMPLETED],
[20211025211712__deltacommit__COMPLETED],
[20211025211811__rollback__COMPLETED], [20211025211843__rollback__COMPLETED],
[20211025211938__deltacommit__COMPL
ETED], [20211025211939__rollback__COMPLETED],
[20211025212012__rollback__COMPLETED],
[20211025212013__deltacommit__COMPLETED],
[20211025212108__rollback__COMPLETED],
[20211025212109__deltacommit__COMPLETED], [20211025212200__clean__COMPLETED],
[20211025212215__rollback__COMPLETED]]
> at
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
> at
org.apache.hudi.timeline.service.RequestHandler$ViewHandler.handle(RequestHandler.java:510)
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
Environment Description
Flink version : 1.13.2
Hudi version : 0.10.0-SNAPSHOT
Hive version : 2.3.3
Hadoop version : 2.7.3
Storage (HDFS/S3/GCS..) : obs
Running on Docker? (yes/no) : no
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]