waywtdcc opened a new issue #4508:
URL: https://github.com/apache/hudi/issues/4508


   
   
   **Describe the problem you faced**
   
   Duplicate Flink Hudi data
   
   **To Reproduce**
   
   Steps to reproduce the behavior:
   
   CREATE TABLE hudi.datagen_test3 (
       id BIGINT,
       name1        VARCHAR(10),
       name2        VARCHAR(10),
       name3        VARCHAR(10),
       name4        VARCHAR(10),
       name5        VARCHAR(10),
       name6        VARCHAR(10),
       name7        VARCHAR(10),
       name8        VARCHAR(10),
       name9        VARCHAR(10),
       name10        VARCHAR(10),
       name11        VARCHAR(10),
       name12        VARCHAR(10),
       name13        VARCHAR(10),
       name14        VARCHAR(10),
       name15        VARCHAR(10),
       name16        VARCHAR(10),
       name17        VARCHAR(10),
       name18        VARCHAR(10),
       name19        VARCHAR(10),
       name20        VARCHAR(10),
       name21        VARCHAR(10),
       name22        VARCHAR(10),
       name23        VARCHAR(10),
       name24        VARCHAR(10),
       name25        VARCHAR(10),
       name26        VARCHAR(10),
       name27        VARCHAR(10),
       name28        VARCHAR(10),
       name29        VARCHAR(10),
       name30        VARCHAR(10),
       name31        VARCHAR(10),
       name32        VARCHAR(10),
       name33        VARCHAR(10),
       name34        VARCHAR(10),
       name35        VARCHAR(10),
       name36        VARCHAR(10),
       name37        VARCHAR(10),
       name38        VARCHAR(10),
       name39        VARCHAR(10),
       name40        VARCHAR(10),
       name41        VARCHAR(10),
       name42        VARCHAR(10),
       name43        VARCHAR(10),
       name44        VARCHAR(10),
       name45        VARCHAR(10),
       name46        VARCHAR(10),
       name47        VARCHAR(10),
       name48        VARCHAR(10),
       name49        VARCHAR(10),
       name50        VARCHAR(10),
       name        VARCHAR(20),
       age        int,
       birthday   TIMESTAMP(3),
       ts TIMESTAMP(3)
   ) WITH (
     'connector' = 'datagen',
     'rows-per-second'= '20',
     'fields.id.min' = '1',
     'fields.id.max' = '1000000'
   );
   2.
   CREATE TABLE hudi.datagen_hudi8(
   id bigint ,
   name1        VARCHAR(10),
       name2        VARCHAR(10),
       name3        VARCHAR(10),
       name4        VARCHAR(10),
       name5        VARCHAR(10),
       name6        VARCHAR(10),
       name7        VARCHAR(10),
       name8        VARCHAR(10),
       name9        VARCHAR(10),
       name10        VARCHAR(10),
       name11        VARCHAR(10),
       name12        VARCHAR(10),
       name13        VARCHAR(10),
       name14        VARCHAR(10),
       name15        VARCHAR(10),
       name16        VARCHAR(10),
       name17        VARCHAR(10),
       name18        VARCHAR(10),
       name19        VARCHAR(10),
       name20        VARCHAR(10),
       name21        VARCHAR(10),
       name22        VARCHAR(10),
       name23        VARCHAR(10),
       name24        VARCHAR(10),
       name25        VARCHAR(10),
       name26        VARCHAR(10),
       name27        VARCHAR(10),
       name28        VARCHAR(10),
       name29        VARCHAR(10),
       name30        VARCHAR(10),
       name31        VARCHAR(10),
       name32        VARCHAR(10),
       name33        VARCHAR(10),
       name34        VARCHAR(10),
       name35        VARCHAR(10),
       name36        VARCHAR(10),
       name37        VARCHAR(10),
       name38        VARCHAR(10),
       name39        VARCHAR(10),
       name40        VARCHAR(10),
       name41        VARCHAR(10),
       name42        VARCHAR(10),
       name43        VARCHAR(10),
       name44        VARCHAR(10),
       name45        VARCHAR(10),
       name46        VARCHAR(10),
       name47        VARCHAR(10),
       name48        VARCHAR(10),
       name49        VARCHAR(10),
       name50        VARCHAR(10),
       name        VARCHAR(20),
   
   birthday TIMESTAMP(3),
   
   ts TIMESTAMP(3),
   
   `partition_str` VARCHAR(20),
   
   primary key(id) not enforced --必须指定uuid 主键
   
   )
   
   PARTITIONED BY (`partition_str`)
   
   with(
   
   'connector'='hudi',
   
   'path'= 'hdfs://test/user/hive/warehouse/hudi.db/datagen_hudi8'
   
   , 'hoodie.datasource.write.recordkey.field'= 'id'-- 主键
   
   , 'write.tasks'= '1'
   
   , 'compaction.tasks'= '1'
   , 'write.precombine.field'= 'ts'-- 自动precombine的字段
   
   , 'table.type'= 'MERGE_ON_READ'-- 默认COPY_ON_WRITE,可选MERGE_ON_READ 
   
   , 'compaction.async.enabled'= 'true'-- 是否开启异步压缩
   
   , 'compaction.trigger.strategy'= 'num_or_time'
   
   , 'compaction.delta_commits'= '2',   -- 默认为5
   'compaction.delta_seconds' = '120',
     'hive_sync.enable' = 'true', 
     'hive_sync.mode' = 'hms'    ,
     'hive_sync.metastore.uris' = 'thrift://test:53083',
       'hive_sync.table'='datagen_hudi8',                         
     'hive_sync.db'='hudi'  ,
     'index.global.enabled' = 'true',
     'index.bootstrap.enabled' = 'true'
   );
   3.insert into hudi.datagen_hudi8
   select id  ,
   name1        ,
       name2        ,
       name3        ,
       name4        ,
       name5        ,
       name6        ,
       name7        ,
       name8        ,
       name9        ,
       name10        ,
       name11        ,
       name12        ,
       name13        ,
       name14        ,
       name15        ,
       name16        ,
       name17        ,
       name18        ,
       name19        ,
       name20        ,
       name21        ,
       name22        ,
       name23        ,
       name24        ,
       name25        ,
       name26        ,
       name27        ,
       name28        ,
       name29        ,
       name30        ,
       name31        ,
       name32        ,
       name33        ,
       name34        ,
       name35        ,
       name36        ,
       name37        ,
       name38        ,
       name39        ,
       name40        ,
       name41        ,
       name42        ,
       name43        ,
       name44        ,
       name45        ,
       name46        ,
       name47        ,
       name48        ,
       name49        ,
       name50        ,
       name       ,
   birthday ,
   ts ,DATE_FORMAT(birthday, 'yyyyMMdd') as `partition_str`
    from hudi.datagen_test3;
   4.
   
   **Expected behavior**
   
   A clear and concise description of what you expected to happen.
   
   **Environment Description**
   
   * Hudi version : 0.9.0
   
   * Flink version :1.12.2
   
   * Hadoop version : 2.7.7
   
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to