This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit d4a67d93f3e5be963238702dc98753cdf633a988
Author: HHoflittlefish777 <[email protected]>
AuthorDate: Wed Apr 10 14:53:51 2024 +0800

    [improve](routine-load) timely pause job if Kafka cluster exception when 
consume (#33372)
---
 .../org/apache/doris/load/routineload/KafkaRoutineLoadJob.java    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java
 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java
index bdcfb9e4a27..8540bb43963 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java
@@ -746,7 +746,13 @@ public class KafkaRoutineLoadJob extends RoutineLoadJob {
                 cachedPartitionWithLatestOffsets.put(pair.first, pair.second);
             }
         } catch (Exception e) {
-            LOG.warn("failed to get latest partition offset. {}", 
e.getMessage(), e);
+            // It needs to pause job when can not get partition meta.
+            // To ensure the stability of the routine load,
+            // the scheduler will automatically pull up routine load job in 
this scenario,
+            // to avoid some network and Kafka exceptions causing the routine 
load job to stop
+            updateState(JobState.PAUSED, new 
ErrorReason(InternalErrorCode.PARTITIONS_ERR,
+                        "failed to get latest partition offset. {}" + 
e.getMessage()),
+                        false /* not replay */);
             return false;
         }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to