ocean-zhc opened a new issue, #5060:
URL: https://github.com/apache/seatunnel/issues/5060

   ### Search before asking
   
   - [X] I had searched in the 
[issues](https://github.com/apache/seatunnel/issues?q=is%3Aissue+label%3A%22bug%22)
 and found no similar issues.
   
   
   ### What happened
   
   kafka source 配置HDFS 的checkpoint检查点,不生效。
   
   
   <img width="1140" alt="image" 
src="https://github.com/apache/seatunnel/assets/46189785/8774e129-9be0-4683-a72b-9221e2212af6";>
   
   程序中断,在重新启动,插入mysql[primary_key]参数不设置的情况下,会重复,,看日志,offset会重复消费,从0开始。
   
   ### SeaTunnel Version
   
   dev分支:2.3.2-SNAPSHOT
   
   ### SeaTunnel Config
   
   ```conf
   env {
       parallelism = 5
       job.mode = "STREAMING"
       job.name = "DZFP_DZDZ_DZDZ_FJXX_PTFP"
       checkpoint.interval = "60000"
       checkpoint.data-uri = "hdfs://localhost:8020/flink-checKpoints"
   }
   
   source {
       Kafka {
           result_table_name = "DZFP_DZDZ_DZDZ_FJXX_PTFP1"
           topic = "DZFP_DZDZ_DZDZ_FJXX_PTFP"
           bootstrap.servers = "localhost:9200"
           commit_on_checkpoint = "true"
           format_error_handle_way = "skip"
           schema = {
               fields {
                    message =  {
                       data = "map<string,string>"
                    }
               }  
           }
           #semantics = EXACTLY_ONCE
           start_mode = "earliest"
           #start_mode.offsets = {
           #     0 = 0
           #     1 = 0
           #     2 = 0
           #}   
           kafka.config = {
               auto.offset.reset = "earliest"
               enable.auto.commit = "true"
               # max.poll.interval.ms = 30000000
               #max.partition.fetch.bytes = "5242880"
               #session.timeout.ms = "30000"
               request.timeout.ms = "60000"
               buffer.memory = "33554432"
               #client.id = client_1
               max.poll.records = "100000"
               
           }
       }
   }
   transform {
       sql{
           source_table_name = "DZFP_DZDZ_DZDZ_FJXX_PTFP1"
           result_table_name = "DZFP_DZDZ_DZDZ_FJXX_PTFP2"
           # qdmx自定义UDF函数
           query = "select qdmx(message,'xsfdzdh') as 
xsfdzdh,qdmx(message,'gf_qxswjg_dm') as gf_qxswjg_dm,qdmx(message,'sblx_dm') as 
sblx_dm,qdmx(message,'gmfdzdh') as gmfdzdh,qdmx(message,'hzxxb_bh') as 
hzxxb_bh,qdmx(message,'mac') as mac,qdmx(message,'sjczbs') as 
sjczbs,qdmx(message,'xsfyhzh') as xsfyhzh,qdmx(message,'cxswjg_dm') as 
cxswjg_dm,qdmx(message,'fpbb_dm') as fpbb_dm,qdmx(message,'skm') as 
skm,qdmx(message,'kpjh') as kpjh,qdmx(message,'kpr') as kpr,qdmx(message,'ydk') 
as ydk,qdmx(message,'jsswjg_dm') as jsswjg_dm,qdmx(message,'ydfpbz') as 
ydfpbz,qdmx(message,'skr') as skr,qdmx(message,'fpqm') as 
fpqm,qdmx(message,'xf_qxswjg_dm') as xf_qxswjg_dm,qdmx(message,'jsr_mc') as 
jsr_mc,qdmx(message,'xdjg') as xdjg,qdmx(message,'tslsh') as 
tslsh,qdmx(message,'fpdm') as fpdm,qdmx(message,'cslx_dm') as 
cslx_dm,qdmx(message,'sjtbsj') as sjtbsj,qdmx(message,'fpdmhm') as 
fpdmhm,qdmx(message,'fhr') as fhr,qdmx(message,'gf_sjswjg_dm') as 
gf_sjswjg_dm,qdmx(message,'fphm') as fphm,qdmx(mes
 sage,'jsr_dm') as jsr_dm,qdmx(message,'zbxlh') as 
zbxlh,qdmx(message,'bs_nsrsbh') as bs_nsrsbh,qdmx(message,'gf_dsswjg_dm') as 
gf_dsswjg_dm,qdmx(message,'bdbz') as bdbz,qdmx(message,'xf_sjswjg_dm') as 
xf_sjswjg_dm,qdmx(message,'data_category') as 
data_category,qdmx(message,'gmfyhzh') as gmfyhzh,qdmx(message,'swjg_dm') as 
swjg_dm,qdmx(message,'sjbb') as sjbb,qdmx(message,'tspz_dm') as 
tspz_dm,qdmx(message,'wspzh') as wspzh,qdmx(message,'sbbh') as 
sbbh,qdmx(message,'yqbz') as yqbz,qdmx(message,'ip') as ip,qdmx(message,'kpyf') 
as kpyf,qdmx(message,'xf_dsswjg_dm') as xf_dsswjg_dm,qdmx(message,'sslkjly') as 
sslkjly from DZFP_DZDZ_DZDZ_FJXX_PTFP1"
       }
   }
   sink {
       #Console {
       #    source_table_name = "DZFP_DZDZ_DZDZ_FJXX_PTFP2"
       #}
        jdbc {
           source_table_name = "DZFP_DZDZ_DZDZ_FJXX_PTFP2"
           url = 
"jdbc:mysql://192.168.202.30:3306/sjjcb_dw?serverTimezone=GMT%2b8&rewriteBatchedStatements=true"
           driver = "com.mysql.cj.jdbc.Driver"
           user = "user"
           password = "pwd"
           batch_size = "200000"
           database = "dbname"
           table = "tablename"
           generate_sink_sql = true
           #is_exactly_once = "true"
           #xa_data_source_class_name = "com.mysql.cj.jdbc.MysqlXADataSource"
           primary_keys = ["fpdmhm"]
       }
   }
   ```
   
   
   ### Running Command
   
   ```shell
   ./bin/seatunnel.sh --config config/qdmx_config/DZFP_DZDZ_DZDZ_FJXX_PTFP.conf 
-e local
   ```
   
   
   ### Error Exception
   
   ```log
   no
   ```
   
   
   ### Flink or Spark Version
   
   no
   
   ### Java or Scala Version
   
   _No response_
   
   ### Screenshots
   
   1.8.0_172
   
   ### Are you willing to submit PR?
   
   - [X] Yes I am willing to submit a PR!
   
   ### Code of Conduct
   
   - [X] I agree to follow this project's [Code of 
Conduct](https://www.apache.org/foundation/policies/conduct)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to