Coreixwumo opened a new issue #3999:
URL: https://github.com/apache/hudi/issues/3999


   flink on hudi with mor
   
   Steps to reproduce the behavior:
   1 create kafka table 
   
   2 create hudi mor table  with sql 
   
   > create table hudi.ods_user_behavior_logic_mor(
   > uuid_did string,
   > content string,
   > client_ip string,
   > userid bigint,
   > visit_time_ts bigint,
   > `event_time` TIMESTAMP(3) COMMENT 'ETL创建时间',
   > `etl_update_time` TIMESTAMP(3) COMMENT 'ETL更新时间',
   > `visit_date` string
   > )PARTITIONED BY (`visit_date`)
   > with (
   > 'connector' = 'hudi'
   > ,'is_generic' = 'true'
   > ,'path' = 'obs://xxx'
   > ,'hoodie.datasource.write.recordkey.field' = 'uuid_did'
   > ,'hoodie.datasource.write.partitionpath.field' = 'visit_date'
   > ,'write.precombine.field' = 'etl_update_time'
   > ,'write.tasks' = '1'
   > ,'table.type' = 'MERGE_ON_READ'
   > ,'index.global.enabled' = 'false'
   > ,'compaction.delta_commits' = '5'
   > ,'compaction.async.enabled' = 'true'
   > ,'compaction.trigger.strategy' = 'num_commits'
   > ,'compaction.delta_seconds' = '90'
   > ,'compaction.tasks' = '1'
   > ,'hive_sync.enable' = 'true'
   > ,'hive_sync.db' = 'hudi'
   > ,'hive_sync.table' = 'ods_user_behavior_mor'
   > ,'hive_sync.username' = 'data'
   > ,'hive_sync.file_format' = 'PARQUET'
   > ,'hive_sync.support_timestamp' = 'true'
   > ,'hive_sync.use_jdbc' = 'true'
   > ,'hive_sync.jdbc_url' = 'jdbc:hive2://xxx:10000'
   > ,'hive_sync.metastore.uris' = 'thrift://xxx:7004'
   > ,'hoodie.datasource.hive_sync.partition_extractor_class' = 
'hudi.DatePartitionExtractor'
   > ,'hoodie.datasource.hive_style_partition' = 'true'
   > ,'hive_sync.partition_fields' = 'visit_date'
   > ,'hive_sync.auto_create_database' = 'true'
   > ,'hive_sync.skip_ro_suffix' = 'false'
   > ,'hive_sync.support_timestamp' = 'false'
   > ,'read.tasks' = '1'
   > ,'read.streaming.enabled' = 'true'
   > ,'hoodie.datasource.query.type' = 'snapshot'
   > ,'read.streaming.check-interval' = '30'
   > ,'hoodie.datasource.merge.type' = 'payload_combine'
   > ,'read.utc-timezone' = 'false'
   > );
   
   3 select kafka table  then insert into hudi table  
   
   > insert into hudi.ods_user_behavior_logic_mor
   > select CONCAT_WS('_', uuid_did, date_format(eventTime, 
'yyyy-MM-dd')),content,client_ip,userid,visit_time_ts,eventTime,
   > now() etl_update_time ,date_format(eventTime, 'yyyy-MM-dd')
   > from hudi.flink_user_behavior_mor
   
   
   I get  those stacktrace:
   type 1 
   
   > ERROR org.apache.hudi.sink.compact.CompactionCommitSink            [] - 
Error while committing compaction instant: 20211111151653
   > java.lang.IllegalArgumentException: null
   >    at 
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:31)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.transitionState(HoodieActiveTimeline.java:459)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.transitionState(HoodieActiveTimeline.java:440)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.transitionCompactionInflightToComplete(HoodieActiveTimeline.java:311)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.table.action.compact.AbstractCompactHelpers.completeInflightCompaction(AbstractCompactHelpers.java:50)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.client.HoodieFlinkWriteClient.completeCompaction(HoodieFlinkWriteClient.java:367)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.client.HoodieFlinkWriteClient.commitCompaction(HoodieFlinkWriteClient.java:352)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.sink.compact.CompactionCommitSink.doCommit(CompactionCommitSink.java:123)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   
   
   type 2 
   
   > ERROR org.apache.hudi.sink.compact.CompactFunction                 [] - 
Executor executes action [Execute compaction for instant 20211111151653 from 
task 0] error
   > org.apache.hudi.exception.HoodieUpsertException: Failed to initialize 
HoodieUpdateHandle for FileId: f8bd7fbd-a1bf-4b2f-bfa8-a5adc3457789 on commit 
20211111151653 on path obs://xxx/ods_user_behavior_mor
   >    at 
org.apache.hudi.io.HoodieMergeHandle.init(HoodieMergeHandle.java:192) 
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.io.HoodieMergeHandle.<init>(HoodieMergeHandle.java:136) 
~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.getUpdateHandle(HoodieFlinkCopyOnWriteTable.java:365)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.handleUpdate(HoodieFlinkCopyOnWriteTable.java:336)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.table.action.compact.HoodieFlinkMergeOnReadTableCompactor.compact(HoodieFlinkMergeOnReadTableCompactor.java:146)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.table.action.compact.FlinkCompactHelpers.compact(FlinkCompactHelpers.java:95)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.sink.compact.CompactFunction.doCompaction(CompactFunction.java:101)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.sink.compact.CompactFunction.lambda$processElement$0(CompactFunction.java:91)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
[?:1.8.0_191]
   >    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
[?:1.8.0_191]
   >    at java.lang.Thread.run(Thread.java:748) [?:1.8.0_191]
   
   >    Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: 
obs://xxx.parquet already exists
   >    at 
org.apache.hadoop.fs.obs.OBSFileSystem.create(OBSFileSystem.java:873) 
~[hadoop-huaweicloud-2.8.3-hw-37.jar:?]
   >    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:929) 
~[flink-shaded-hadoop-2-2.8.3-10.0.jar:2.8.3-10.0]
   >    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:910) 
~[flink-shaded-hadoop-2-2.8.3-10.0.jar:2.8.3-10.0]
   >    at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.lambda$create$12(HoodieWrapperFileSystem.java:300)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:100)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.create(HoodieWrapperFileSystem.java:298)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:295)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:283)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:227)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.io.storage.HoodieParquetWriter.<init>(HoodieParquetWriter.java:56)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   
   
   type 3 
   
   > ERROR org.apache.hudi.timeline.service.RequestHandler              [] - 
Got runtime exception servicing request 
basepath=obs://xxx/ods_user_behavior&lastinstantts=20211025212215&timelinehash=b5e2d2477ff5ba3e459697463d4402d1246d1df6f13c31e0509bfcbed21cbd6f
   > java.lang.IllegalArgumentException: Last known instant from client was 
20211025212215 but server has the following timeline 
[[20211025190555__clean__COMPLETED], [20211025191055__clean__COMPLETED], 
[20211025191555__clean__COMPLETED], [20211025192055__clean__COMPLETED], 
[20211025192555__clean__COMPLETED], [20211025193055__clean__COMPLETED], 
[20211025193555__clean__COMPLETED], [20211025194055__clean__COMPLETED], 
[20211025194555__clean__COMPLETED], [20211025195055__clean__COMPLETED], 
[20211025195555__clean__COMPLETED], [20211025200055__clean__COMPLETED], 
[20211025200555__clean__COMPLETED], [20211025201055__clean__COMPLETED], 
[20211025201555__clean__COMPLETED], [20211025202055__clean__COMPLETED], 
[20211025202555__clean__COMPLETED], [20211025203055__clean__COMPLETED], 
[20211025203634__clean__COMPLETED], [==>20211025203641__compaction__REQUESTED], 
[20211025203642__deltacommit__COMPLETED], 
[20211025203757__deltacommit__COMPLETED], 
[20211025203948__deltacommit__COMPLETED], [2021102520401
 8__deltacommit__COMPLETED], [20211025204111__deltacommit__COMPLETED], 
[20211025204157__deltacommit__COMPLETED], [20211025204234__clean__COMPLETED], 
[20211025204255__deltacommit__COMPLETED], 
[20211025204408__deltacommit__COMPLETED], 
[20211025204454__deltacommit__COMPLETED], 
[20211025204543__deltacommit__COMPLETED], 
[20211025204644__deltacommit__COMPLETED], 
[20211025204741__deltacommit__COMPLETED], 
[20211025204841__rollback__COMPLETED], 
[20211025204843__deltacommit__COMPLETED], 
[20211025204941__rollback__COMPLETED], 
[20211025204950__deltacommit__COMPLETED], 
[20211025205041__deltacommit__COMPLETED], 
[20211025205041__rollback__COMPLETED], [20211025205142__rollback__COMPLETED], 
[20211025205312__deltacommit__COMPLETED], 
[20211025205313__rollback__COMPLETED], [20211025205351__rollback__COMPLETED], 
[20211025205352__deltacommit__COMPLETED], [20211025205445__commit__COMPLETED], 
[20211025205445__rollback__COMPLETED], 
[20211025205446__deltacommit__COMPLETED], [20211025205545__commit__COMPLETED]
 , [20211025205546__deltacommit__COMPLETED], 
[20211025205546__rollback__COMPLETED], 
[20211025205646__deltacommit__COMPLETED], 
[20211025205747__deltacommit__COMPLETED], 
[20211025205845__rollback__COMPLETED], 
[20211025205856__deltacommit__COMPLETED], 
[20211025205945__rollback__COMPLETED], 
[20211025205951__deltacommit__COMPLETED], 
[20211025210045__rollback__COMPLETED], [20211025210046__commit__COMPLETED], 
[20211025210047__deltacommit__COMPLETED], 
[20211025210147__rollback__COMPLETED], 
[20211025210205__deltacommit__COMPLETED], 
[20211025210257__deltacommit__COMPLETED], [20211025210338__clean__COMPLETED], 
[20211025210347__rollback__COMPLETED], 
[20211025210426__deltacommit__COMPLETED], 
[20211025210427__rollback__COMPLETED], [20211025210541__rollback__COMPLETED], 
[20211025210543__deltacommit__COMPLETED], [20211025210603__clean__COMPLETED], 
[20211025210647__commit__COMPLETED], [20211025210647__rollback__COMPLETED], 
[20211025210648__deltacommit__COMPLETED], [20211025210712__deltacommit__COMPLE
 TED], [20211025210811__rollback__COMPLETED], 
[20211025210812__deltacommit__COMPLETED], 
[20211025210911__rollback__COMPLETED], 
[20211025210912__deltacommit__COMPLETED], 
[20211025211011__rollback__COMPLETED], 
[20211025211013__deltacommit__COMPLETED], [20211025211103__clean__COMPLETED], 
[20211025211134__commit__COMPLETED], [20211025211135__deltacommit__COMPLETED], 
[20211025211136__rollback__COMPLETED], 
[20211025211233__deltacommit__COMPLETED], 
[20211025211317__rollback__COMPLETED], 
[20211025211318__deltacommit__COMPLETED], 
[20211025211414__rollback__COMPLETED], 
[20211025211415__deltacommit__COMPLETED], 
[20211025211510__rollback__COMPLETED], 
[20211025211511__deltacommit__COMPLETED], [20211025211603__clean__COMPLETED], 
[20211025211610__commit__COMPLETED], [20211025211611__deltacommit__COMPLETED], 
[20211025211611__rollback__COMPLETED], 
[20211025211712__deltacommit__COMPLETED], 
[20211025211811__rollback__COMPLETED], [20211025211843__rollback__COMPLETED], 
[20211025211938__deltacommit__COMPL
 ETED], [20211025211939__rollback__COMPLETED], 
[20211025212012__rollback__COMPLETED], 
[20211025212013__deltacommit__COMPLETED], 
[20211025212108__rollback__COMPLETED], 
[20211025212109__deltacommit__COMPLETED], [20211025212200__clean__COMPLETED], 
[20211025212215__rollback__COMPLETED]]
   >    at 
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   >    at 
org.apache.hudi.timeline.service.RequestHandler$ViewHandler.handle(RequestHandler.java:510)
 ~[flink-batch-1.0-SNAPSHOT-jar-with-dependencies.jar:?]
   
   
   Environment Description
   
   Flink version : 1.13.2 
   
   Hudi version : 0.10.0-SNAPSHOT
   
   Hive version : 2.3.3
   
   Hadoop version : 2.7.3
   
   Storage (HDFS/S3/GCS..) : obs
   
   Running on Docker? (yes/no) : no
   
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to