YuriyGavrilov commented on issue #8604:
URL: https://github.com/apache/seatunnel/issues/8604#issuecomment-2634935158
With this config it works good on 1200 rows, but after get same error
# Defining the runtime environment
env {
parallelism = 1
job.mode = "BATCH"
execution.checkpoint.interval = 180000
}
source {
LocalFile {
schema {
fields {
no = string
answer = string
}
}
path = "/config/2data44.csv"
file_format_type = "text"
field_delimiter = "\t"
skip_header_row_number = 1
read_columns = ["no", "answer"]
parse_options {
skip_empty_rows = true
allow_missing_values = true
error_handling = "skip_row"
}
}
}
transform {
LLM {
model_provider = OPENAI
inference_columns = ["answer"]
model = gpt-4o-mini
# error_handling = "SKIP"
timeout = 60000
retry_times = 3
batch_size = 1
delay_between_requests = 2000
normalize_input = true
min_message_length = 5
api_key = "XXXX"
output_column = "llm_result"
api_path = "https://api.XXX.YYY/openai/v1/chat/completions"
error_handling {
mode = "SKIP"
default_value = "999"
}
prompt = "bla bla bla ${answer} bla bla bla "
}
}
sink {
LocalFile {
path = "/config/2data_out.csv"
file_format_type = "text"
field_delimiter = "\t"
encoding = "UTF-8"
rolling_policy.size = "128MB"
data_save_mode = "APPEND_DATA"
enable_checkpoint = true
tmp_path = "/config/tmp/sea"
write_options {
max_rows_in_memory = 100000
batch_size = 500
retry_times = 3
retry_interval = 1000
}
}
}
error:
```
2025-02-04 19:53:34,029 INFO [o.a.s.e.c.j.JobMetricsRunner ]
[job-metrics-runner-939242294890987521] -
***********************************************
Job Progress Information
***********************************************
Job Id : 939242294890987521
Read Count So Far : 1256
Write Count So Far : 1252
Average Read Count : 1/s
Average Write Count : 1/s
Last Statistic Time : 2025-02-04 19:52:34
Current Statistic Time : 2025-02-04 19:53:34
***********************************************
2025-02-04 19:53:39,848 WARN [o.a.s.e.s.TaskExecutionService]
[BlockingWorker-TaskGroupLocation{jobId=939242294890987521, pipelineId=1,
taskGroupId=50000}] - [localhost]:5801 [seatunnel-867441] [5.1] Exception in
org.apache.seatunnel.engine.server.task.SourceSeaTunnelTask@50f03a70
�org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException:
ErrorCode:[FILE-08], ErrorDescription:[File read failed] - Read data from this
file [default.default.default_file:/config/2data44.csv] failed
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.MultipleTableFileSourceReader.pollNext(MultipleTableFileSourceReader.java:85)
~[connector-file-local-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.flow.SourceFlowLifeCycle.collect(SourceFlowLifeCycle.java:159)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.SourceSeaTunnelTask.collect(SourceSeaTunnelTask.java:127)
~[seatunnel-starter.jar:2.3.9]
at
org.apache.seatunnel.engine.server.task.SeaTunnelTask.stateProcess(SeaTunnelTask.java:169)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.SourceSeaTunnelTask.call(SourceSeaTunnelTask.java:132)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.TaskExecutionService$BlockingWorker.run(TaskExecutionService.java:694)
[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.TaskExecutionService$NamedTaskWrapper.run(TaskExecutionService.java:1019)
[seatunnel-starter.jar:2.3.9]
at
org.apache.seatunnel.api.tracing.MDCRunnable.run(MDCRunnable.java:43)
[seatunnel-starter.jar:2.3.9]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[?:1.8.0_342]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[?:1.8.0_342]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_342]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_342]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_342]
�Caused by: java.lang.RuntimeException: Failed to inference model with row
SeaTunnelRow{tableId=, kind=+I, fields=[2088667, "?"]}
� at
org.apache.seatunnel.transform.nlpmodel.llm.LLMTransform.getOutputFieldValue(LLMTransform.java:160)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:47)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:35)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractSeaTunnelTransform.transform(AbstractSeaTunnelTransform.java:80)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:42)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:27)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:40)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:28)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.transform(TransformFlowLifeCycle.java:160)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:122)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:43)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.sendRecordToNext(SeaTunnelSourceCollector.java:195)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.collect(SeaTunnelSourceCollector.java:112)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.lambda$readProcess$0(TextReadStrategy.java:133)
~[connector-file-local-2.3.9.jar:2.3.9]
at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
~[?:1.8.0_342]
at java.util.stream.SliceOps$1$1.accept(SliceOps.java:204)
~[?:1.8.0_342]
at java.util.Iterator.forEachRemaining(Iterator.java:116)
~[?:1.8.0_342]
at
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
~[?:1.8.0_342]
at
java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
~[?:1.8.0_342]
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
~[?:1.8.0_342]
at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
~[?:1.8.0_342]
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
~[?:1.8.0_342]
at
java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
~[?:1.8.0_342]
at
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
~[?:1.8.0_342]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.readProcess(TextReadStrategy.java:104)
~[connector-file-local-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.AbstractReadStrategy.resolveArchiveCompressedInputStream(AbstractReadStrategy.java:268)
~[connector-file-local-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.read(TextReadStrategy.java:71)
~[connector-file-local-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.MultipleTableFileSourceReader.pollNext(MultipleTableFileSourceReader.java:81)
~[connector-file-local-2.3.9.jar:2.3.9]
... 12 more
Caused by: java.lang.IndexOutOfBoundsException: Index: 0, Size: 0
at java.util.ArrayList.rangeCheck(ArrayList.java:659) ~[?:1.8.0_342]
at java.util.ArrayList.get(ArrayList.java:435) ~[?:1.8.0_342]
� at
org.apache.seatunnel.transform.nlpmodel.llm.LLMTransform.getOutputFieldValue(LLMTransform.java:145)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:47)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.SingleFieldOutputTransform.transformRow(SingleFieldOutputTransform.java:35)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractSeaTunnelTransform.transform(AbstractSeaTunnelTransform.java:80)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:42)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform.map(AbstractCatalogSupportMapTransform.java:27)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:40)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.transform.common.AbstractMultiCatalogMapTransform.map(AbstractMultiCatalogMapTransform.java:28)
~[seatunnel-transforms-v2-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.transform(TransformFlowLifeCycle.java:160)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:122)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.flow.TransformFlowLifeCycle.received(TransformFlowLifeCycle.java:43)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.sendRecordToNext(SeaTunnelSourceCollector.java:195)
~[seatunnel-starter.jar:2.3.9]
� at
org.apache.seatunnel.engine.server.task.SeaTunnelSourceCollector.collect(SeaTunnelSourceCollector.java:112)
~[seatunnel-starter.jar:2.3.9]
�el.connectors.seatunnel.file.source.reader.TextReadStrategy.lambda$readProcess$0(TextReadStrategy.java:133)
~[connector-file-local-2.3.9.jar:2.3.9]
at
java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
~[?:1.8.0_342]
at java.util.stream.SliceOps$1$1.accept(SliceOps.java:204)
~[?:1.8.0_342]
at java.util.Iterator.forEachRemaining(Iterator.java:116)
~[?:1.8.0_342]
at
java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1801)
~[?:1.8.0_342]
at
java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
~[?:1.8.0_342]
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
~[?:1.8.0_342]
at
java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
~[?:1.8.0_342]
at
java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
~[?:1.8.0_342]
at
java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
~[?:1.8.0_342]
at
java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
~[?:1.8.0_342]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.readProcess(TextReadStrategy.java:104)
~[connector-file-local-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.AbstractReadStrategy.resolveArchiveCompressedInputStream(AbstractReadStrategy.java:268)
~[connector-file-local-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.TextReadStrategy.read(TextReadStrategy.java:71)
~[connector-file-local-2.3.9.jar:2.3.9]
� at
org.apache.seatunnel.connectors.seatunnel.file.source.reader.MultipleTableFileSourceReader.pollNext(MultipleTableFileSourceReader.java:81)
~[connector-file-local-2.3.9.jar:2.3.9]
... 12 more
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]