liusy-bd commented on issue #10417:
URL: https://github.com/apache/seatunnel/issues/10417#issuecomment-3957683274

   @LeonYoah 
   
[checkpoint_1078936991984254978.zip](https://github.com/user-attachments/files/25540370/checkpoint_1078936991984254978.zip)
   
   
   Using the incremental mode, the recovery operation will be executed two 
minutes after the save point is made. 
   This is the complete config document.
   ```
   cat > 
/data/workspaces/sea_config_file/STREAMING-ZETA-ZG_ERP-ALL_TABLE.config <<EOF
   env {
     job.mode = "STREAMING"
     parallelism = 1
     checkpoint.interval = 120000      # 120s
     checkpoint.timeout =  120000
     checkpoint.max_concurrent = 1
     read_limit.rows_per_second=2000
   }
   
   source {
     Oracle-CDC {
       plugin_output = "customers"
       url = "jdbc:oracle:thin:@db.erp.xxxx.com:1521/ORCL"
       username = "dbzuser"
       password = "aVnmmpqr7WmVLFlx"
       database-names = ["ORCL"]
       schema-names = ["ZG_ERP"]
       table-names = [
         "ORCL.ZG_ERP.TSALES_ORDER_LOG",
         "ORCL.ZG_ERP.TSALES_ORDER_ITEM"
       ]
       source.reader.close.timeout = 120000
       startup.mode = "LATEST"        # INITIAL 全量后一直接增量 EARLIEST 从能拿到的最早日志位置开始 
只做增量 LATEST 从当前最新日志位置开始
       stop.mode = "never"            # 不自动停止
       exactly_once = true
       schema-changes.enabled = true  # 是否启用模式演进(新增、删除、重命名字段)
       snapshot.split.size = 5000     # 每个 split chunk 大小(行数)
       snapshot.fetch.size = 2048     # 每次 fetch 时批大小,适当调大减小交互延迟
       skip_analyze = true            # 不用 Seatunnel 自己分析表(如果你 DBA 已提前分析好)
       use_select_count=true
       connect.timeout.ms = 60000     # JDBC 连接超时时间
       connect.max-retries = 5
       connection.pool.size = 5       # JDBC 连接池大小(适当调大)
       # 控制 chunk key 的均匀分布阈值
       chunk-key.even-distribution.factor.upper-bound = 200
       chunk-key.even-distribution.factor.lower-bound = 0.01
       sample-sharding.threshold = 2000
       inverse-sampling.rate = 2000
       # dbz参数 优化
       debezium.log.mining.strategy=online_catalog
       debezium.log.mining.continuous.mine=true
     }
   }
   
   
   sink {
     Paimon {
       catalog_name="seatunnel_zg_erp_test"
       catalog_type="hive"
       catalog_uri="thrift://10.80.108.99:7004,thrift://10.80.108.88:7004"
       warehouse = "hdfs:///usr/hive/warehouse"
       database = "ODS_\${schema_name}"
       table = "\${table_name}"
       paimon.hadoop.conf = {
             fs.defaultFS = "hdfs://HDFS9999"
             dfs.nameservices = "HDFS9999"
             dfs.ha.namenodes.HDFS9999 = "nn1,nn2"
             dfs.namenode.rpc-address.HDFS9999.nn1 = "10.80.108.99:4007"
             dfs.namenode.rpc-address.HDFS9999.nn2 = "10.80.108.88:4007"
             dfs.client.failover.proxy.provider.HDFS9999 = 
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
             dfs.client.use.datanode.hostname = "true"
           }
       paimon.table.write-props = {
             bucket = 1
             write-only = true # 只写入 需开启专用压缩
             target-file-size = 128MB
             write-buffer-spillable = true
             # 开启异步Compaction
             num-sorted-run.stop-trigger = 2147483647 # 极大值,减少写入暂停
             sort-spill-threshold = 10 # 防止内存溢出
             changelog-producer.lookup-wait = false # 禁用同步等待,实现异步
           }
     }
   }
   EOF
   
   ```
   
   **Submit task 、 Execute savepoint、 Execute recovery**
   
   ```
   $SEATUNNEL_HOME/bin/seatunnel.sh  \
   --async \
   -e cluster \
   --config 
/data/workspaces/sea_config_file/STREAMING-ZETA-ZG_ERP-ALL_TABLE.config \
   -n STREAMING-ZETA-ZG_ERP-ALL_TABLE
   
   
   $SEATUNNEL_HOME/bin/seatunnel.sh -s 1078936991984254978
   
   
   $SEATUNNEL_HOME/bin/seatunnel.sh \
   -c /data/workspaces/sea_config_file/STREAMING-ZETA-ZG_ERP-ALL_TABLE.config  \
   -r 1078936991984254978 \
   -n STREAMING-ZETA-ZG_ERP-ALL_TABLE
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to