This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit d4a67d93f3e5be963238702dc98753cdf633a988 Author: HHoflittlefish777 <[email protected]> AuthorDate: Wed Apr 10 14:53:51 2024 +0800 [improve](routine-load) timely pause job if Kafka cluster exception when consume (#33372) --- .../org/apache/doris/load/routineload/KafkaRoutineLoadJob.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java index bdcfb9e4a27..8540bb43963 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java @@ -746,7 +746,13 @@ public class KafkaRoutineLoadJob extends RoutineLoadJob { cachedPartitionWithLatestOffsets.put(pair.first, pair.second); } } catch (Exception e) { - LOG.warn("failed to get latest partition offset. {}", e.getMessage(), e); + // It needs to pause job when can not get partition meta. + // To ensure the stability of the routine load, + // the scheduler will automatically pull up routine load job in this scenario, + // to avoid some network and Kafka exceptions causing the routine load job to stop + updateState(JobState.PAUSED, new ErrorReason(InternalErrorCode.PARTITIONS_ERR, + "failed to get latest partition offset. {}" + e.getMessage()), + false /* not replay */); return false; } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
