YuriyGavrilov commented on issue #8604:
URL: https://github.com/apache/seatunnel/issues/8604#issuecomment-2634935158

   With this config it works good on 1200 rows, but after get same error 
   
   # Defining the runtime environment
   env {
     parallelism = 1
     job.mode = "BATCH"
     execution.checkpoint.interval = 180000
   }
   
   source {
     LocalFile {
       schema {
         fields {
           no = string
           answer = string
         }
       }
       path = "/config/2data44.csv"
       file_format_type = "text"
       field_delimiter = "\t"
       skip_header_row_number = 1
       read_columns = ["no", "answer"]
   
       parse_options {
         skip_empty_rows = true
         allow_missing_values = true
         error_handling = "skip_row"
       }
   
     }
   }
   
   
   
   transform {
     LLM {
       model_provider = OPENAI
       inference_columns = ["answer"]
       model = gpt-4o-mini
       # error_handling = "SKIP" 
       timeout = 60000 
       retry_times = 3
   
   
   
       batch_size = 1
       delay_between_requests = 2000
       normalize_input = true
       min_message_length = 5
       api_key = "XXXX" 
       output_column = "llm_result" 
       api_path  = "https://api.XXX.YYY/openai/v1/chat/completions";
   
       error_handling {
         mode = "SKIP"
         default_value = "999"
       }
   
       prompt = "bla bla bla ${answer} bla bla bla "
   
     }
   }
   
   sink {
     LocalFile {
       path = "/config/2data_out.csv"
       file_format_type = "text" 
       field_delimiter = "\t"
       encoding = "UTF-8"
       rolling_policy.size = "128MB"
       data_save_mode = "APPEND_DATA"
       enable_checkpoint = true
       tmp_path = "/config/tmp/sea"
   
       write_options {
         max_rows_in_memory = 100000
         batch_size = 500
         retry_times = 3
         retry_interval = 1000
       }
   
     }
   }
   
   
   
   error:
   ```
   
   2025-02-04 19:53:34,029 INFO  [o.a.s.e.c.j.JobMetricsRunner  ] 
[job-metrics-runner-939242294890987521] - 
   ***********************************************
              Job Progress Information
   ***********************************************
   Job Id                    :  939242294890987521
   Read Count So Far         :                1256
   Write Count So Far        :                1252
   Average Read Count        :                 1/s
   Average Write Count       :                 1/s
   Last Statistic Time       : 2025-02-04 19:52:34
   Current Statistic Time    : 2025-02-04 19:53:34
   ***********************************************
   
   2025-02-04 19:53:39,848 WARN  [o.a.s.e.s.TaskExecutionService] 
[BlockingWorker-TaskGroupLocation{jobId=939242294890987521, pipelineId=1, 
taskGroupId=50000}] - [localhost]:5801 [seatunnel-867441] [5.1] Exception in 
org.apache.seatunnel.engine.server.task.SourceSeaTunnelTask@50f03a70
   
�org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException:
 ErrorCode:[FILE-08], ErrorDescription:[File read failed] - Read data from this 
file [default.default.default_file:/config/2data44.csv] failed
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.MultipleTableFileSourceReader.pollNext(MultipleTableFileSourceReader.java:85)
 ~[connector-file-local-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.flow.SourceFlowLifeCycle.collect(SourceFlowLifeCycle.java:159)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.SourceSeaTunnelTask.collect(SourceSeaTunnelTask.java:127)
 ~[seatunnel-starter.jar:2.3.9]
           at 
org.apache.seatunnel.engine.server.task.SeaTunnelTask.stateProcess(SeaTunnelTask.java:169)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.SourceSeaTunnelTask.call(SourceSeaTunnelTask.java:132)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.TaskExecutionService$BlockingWorker.run(TaskExecutionService.java:694)
 [seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.TaskExecutionService$NamedTaskWrapper.run(TaskExecutionService.java:1019)
 [seatunnel-starter.jar:2.3.9]
           at 
org.apache.seatunnel.api.tracing.MDCRunnable.run(MDCRunnable.java:43) 
[seatunnel-starter.jar:2.3.9]
           at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
[?:1.8.0_342]
           at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
[?:1.8.0_342]
           at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
[?:1.8.0_342]
           at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
[?:1.8.0_342]
           at java.lang.Thread.run(Thread.java:750) [?:1.8.0_342]
   �Caused by: java.lang.RuntimeException: Failed to inference model with row 
SeaTunnelRow{tableId=, kind=+I, fields=[2088667, "?"]}
   �       at 
org.apache.seatunnel.transform.nlpmodel.llm.LLMTransform.getOutputFieldValue(LLMTransform.java:160)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:47)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:35)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractSeaTunnelTransform.transform(AbstractSeaTunnelTransform.java:80)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:42)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:27)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:40)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:28)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.transform(TransformFlowLifeCycle.java:160)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:122)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:43)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.sendRecordToNext(SeaTunnelSourceCollector.java:195)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.collect(SeaTunnelSourceCollector.java:112)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.lambda$readProcess$0(TextReadStrategy.java:133)
 ~[connector-file-local-2.3.9.jar:2.3.9]
           at 
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) 
~[?:1.8.0_342]
           at java.util.stream.SliceOps$1$1.accept(SliceOps.java:204) 
~[?:1.8.0_342]
           at java.util.Iterator.forEachRemaining(Iterator.java:116) 
~[?:1.8.0_342]
           at 
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
 ~[?:1.8.0_342]
           at 
java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482) 
~[?:1.8.0_342]
           at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) 
~[?:1.8.0_342]
           at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150) 
~[?:1.8.0_342]
           at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
 ~[?:1.8.0_342]
           at 
java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) 
~[?:1.8.0_342]
           at 
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485) 
~[?:1.8.0_342]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.readProcess(TextReadStrategy.java:104)
 ~[connector-file-local-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.AbstractReadStrategy.resolveArchiveCompressedInputStream(AbstractReadStrategy.java:268)
 ~[connector-file-local-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.read(TextReadStrategy.java:71)
 ~[connector-file-local-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.MultipleTableFileSourceReader.pollNext(MultipleTableFileSourceReader.java:81)
 ~[connector-file-local-2.3.9.jar:2.3.9]
           ... 12 more
   Caused by: java.lang.IndexOutOfBoundsException: Index: 0, Size: 0
           at java.util.ArrayList.rangeCheck(ArrayList.java:659) ~[?:1.8.0_342]
           at java.util.ArrayList.get(ArrayList.java:435) ~[?:1.8.0_342]
   �       at 
org.apache.seatunnel.transform.nlpmodel.llm.LLMTransform.getOutputFieldValue(LLMTransform.java:145)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:47)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:35)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractSeaTunnelTransform.transform(AbstractSeaTunnelTransform.java:80)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:42)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:27)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:40)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:28)
 ~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.transform(TransformFlowLifeCycle.java:160)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:122)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:43)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.sendRecordToNext(SeaTunnelSourceCollector.java:195)
 ~[seatunnel-starter.jar:2.3.9]
   �       at 
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.collect(SeaTunnelSourceCollector.java:112)
 ~[seatunnel-starter.jar:2.3.9]
   
�el.connectors.seatunnel.file.source.reader.TextReadStrategy.lambda$readProcess$0(TextReadStrategy.java:133)
 ~[connector-file-local-2.3.9.jar:2.3.9]
           at 
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) 
~[?:1.8.0_342]
           at java.util.stream.SliceOps$1$1.accept(SliceOps.java:204) 
~[?:1.8.0_342]
           at java.util.Iterator.forEachRemaining(Iterator.java:116) 
~[?:1.8.0_342]
           at 
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
 ~[?:1.8.0_342]
           at 
java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482) 
~[?:1.8.0_342]
           at 
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) 
~[?:1.8.0_342]
           at 
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150) 
~[?:1.8.0_342]
           at 
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
 ~[?:1.8.0_342]
           at 
java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) 
~[?:1.8.0_342]
           at 
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485) 
~[?:1.8.0_342]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.readProcess(TextReadStrategy.java:104)
 ~[connector-file-local-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.AbstractReadStrategy.resolveArchiveCompressedInputStream(AbstractReadStrategy.java:268)
 ~[connector-file-local-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.read(TextReadStrategy.java:71)
 ~[connector-file-local-2.3.9.jar:2.3.9]
   �       at 
org.apache.seatunnel.connectors.seatunnel.file.source.reader.MultipleTableFileSourceReader.pollNext(MultipleTableFileSourceReader.java:81)
 ~[connector-file-local-2.3.9.jar:2.3.9]
           ... 12 more
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to