[
https://issues.apache.org/jira/browse/GOBBLIN-1840?focusedWorklogId=868261&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-868261
]
ASF GitHub Bot logged work on GOBBLIN-1840:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 28/Jun/23 21:08
Start Date: 28/Jun/23 21:08
Worklog Time Spent: 10m
Work Description: ZihanLi58 commented on code in PR #3704:
URL: https://github.com/apache/gobblin/pull/3704#discussion_r1245763554
##########
gobblin-cluster/src/main/java/org/apache/gobblin/cluster/GobblinThrottlingHelixJobLauncherListener.java:
##########
@@ -0,0 +1,79 @@
+package org.apache.gobblin.cluster;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.runtime.JobContext;
+import org.apache.gobblin.runtime.JobState;
+
+
+/**
+ * A job listener used when {@link GobblinHelixJobLauncher} launches a job.
+ * In {@link GobblinHelixJobScheduler}, when throttling is enabled, this
+ * listener would record jobName to next schedulable time to decide whether
+ * the replanning should be executed or skipped.
+ */
+@Slf4j
+public class GobblinThrottlingHelixJobLauncherListener extends
GobblinHelixJobLauncherListener {
+
+ public final static Logger LOG =
LoggerFactory.getLogger(GobblinThrottlingHelixJobLauncherListener.class);
+ private ConcurrentHashMap<String, Instant> jobNameToNextSchedulableTime;
+ private Duration helixJobSchedulingThrottleTimeout;
+ private Clock clock;
+
+ public
GobblinThrottlingHelixJobLauncherListener(GobblinHelixJobLauncherMetrics
jobLauncherMetrics,
+ ConcurrentHashMap<String, Instant> jobNameToNextSchedulableTime,
Duration helixJobSchedulingThrottleTimeout, Clock clock) {
+ super(jobLauncherMetrics);
+ this.jobNameToNextSchedulableTime = jobNameToNextSchedulableTime;
+ this.helixJobSchedulingThrottleTimeout = helixJobSchedulingThrottleTimeout;
+ this.clock = clock;
+ }
+
+ @Override
+ public void onJobPrepare(JobContext jobContext)
+ throws Exception {
+ super.onJobPrepare(jobContext);
+ Instant nextSchedulableTime =
clock.instant().plus(helixJobSchedulingThrottleTimeout);
+ jobNameToNextSchedulableTime.put(jobContext.getJobName(),
nextSchedulableTime);
+ LOG.info("{} finished preparing. The next schedulable time is {}",
jobContext.getJobName(), nextSchedulableTime);
+ }
+
+ @Override
+ public void onJobStart(JobContext jobContext)
+ throws Exception {
+ super.onJobStart(jobContext);
+ Instant nextSchedulableTime =
clock.instant().plus(helixJobSchedulingThrottleTimeout);
+ jobNameToNextSchedulableTime.put(jobContext.getJobName(),
nextSchedulableTime);
+ LOG.info("{} has started. The next schedulable time is {}",
jobContext.getJobName(), nextSchedulableTime);
+ }
+
+ @Override
+ public void onJobCompletion(JobContext jobContext)
+ throws Exception {
+ super.onJobCompletion(jobContext);
+ if (jobContext.getJobState().getState() == JobState.RunningState.FAILED) {
+ jobNameToNextSchedulableTime.put(jobContext.getJobName(), Instant.EPOCH);
+ LOG.info("{} failed. The next schedulable time is {} so that any future
schedule attempts will be allowed.",
Review Comment:
"{} failed. -> {} completed?
##########
gobblin-cluster/src/main/java/org/apache/gobblin/cluster/GobblinHelixJobScheduler.java:
##########
@@ -303,36 +342,66 @@ public Object get(long timeout, TimeUnit unit) throws
InterruptedException, Exec
}
@Subscribe
- public void handleNewJobConfigArrival(NewJobConfigArrivalEvent
newJobArrival) {
- String jobUri = newJobArrival.getJobName();
- LOGGER.info("Received new job configuration of job " + jobUri);
+ public synchronized void handleNewJobConfigArrival(NewJobConfigArrivalEvent
newJobArrival) {
+ String jobName = newJobArrival.getJobName();
+ LOGGER.info("Received new job configuration of job " + jobName);
+
+ Instant nextSchedulableTime =
jobNameToNextSchedulableTime.getOrDefault(jobName, Instant.EPOCH);
+ if (this.isThrottleEnabled &&
clock.instant().isBefore(nextSchedulableTime)) {
+ LOGGER.info("Adding new job is skipped for job {}. Current time is {}
and the next schedulable time would be {}",
+ jobName,
+ clock.instant(),
+ nextSchedulableTime
+ );
+ return;
+ }
+ nextSchedulableTime = clock.instant().plus(jobSchedulingThrottleTimeout);
Review Comment:
Should we only add entry to jobNameToNextSchedulableTime when the throttle
is enabled? it is a hash map, where we can easily see memory leak when we not
delete the entry properly
##########
gobblin-cluster/src/main/java/org/apache/gobblin/cluster/GobblinThrottlingHelixJobLauncherListener.java:
##########
@@ -0,0 +1,79 @@
+package org.apache.gobblin.cluster;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.runtime.JobContext;
+import org.apache.gobblin.runtime.JobState;
+
+
+/**
+ * A job listener used when {@link GobblinHelixJobLauncher} launches a job.
+ * In {@link GobblinHelixJobScheduler}, when throttling is enabled, this
+ * listener would record jobName to next schedulable time to decide whether
+ * the replanning should be executed or skipped.
+ */
+@Slf4j
+public class GobblinThrottlingHelixJobLauncherListener extends
GobblinHelixJobLauncherListener {
+
+ public final static Logger LOG =
LoggerFactory.getLogger(GobblinThrottlingHelixJobLauncherListener.class);
+ private ConcurrentHashMap<String, Instant> jobNameToNextSchedulableTime;
+ private Duration helixJobSchedulingThrottleTimeout;
+ private Clock clock;
+
+ public
GobblinThrottlingHelixJobLauncherListener(GobblinHelixJobLauncherMetrics
jobLauncherMetrics,
+ ConcurrentHashMap<String, Instant> jobNameToNextSchedulableTime,
Duration helixJobSchedulingThrottleTimeout, Clock clock) {
+ super(jobLauncherMetrics);
+ this.jobNameToNextSchedulableTime = jobNameToNextSchedulableTime;
+ this.helixJobSchedulingThrottleTimeout = helixJobSchedulingThrottleTimeout;
+ this.clock = clock;
+ }
+
+ @Override
+ public void onJobPrepare(JobContext jobContext)
Review Comment:
Why for the same job, why we try to update the schedulable time three times?
once when we handle the message, once when we prepare the job, once when job
start. This will be confusing reading the log.
If we have concerns about race conditions when we handle messages, can we
only update it when handling a message
Issue Time Tracking
-------------------
Worklog Id: (was: 868261)
Time Spent: 4h 20m (was: 4h 10m)
> Helix Job scheduler should not try to replace running workflow if within
> configured time
> ----------------------------------------------------------------------------------------
>
> Key: GOBBLIN-1840
> URL: https://issues.apache.org/jira/browse/GOBBLIN-1840
> Project: Apache Gobblin
> Issue Type: Improvement
> Reporter: Matthew Ho
> Priority: Major
> Time Spent: 4h 20m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)