This is an automated email from the ASF dual-hosted git repository.
kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new a2254b47275 Fix off-by-two with task action retry count. (#18755)
a2254b47275 is described below
commit a2254b472759a3060c2dab2c1099dcf8df4270c8
Author: Gian Merlino <[email protected]>
AuthorDate: Thu Nov 20 20:26:56 2025 -0800
Fix off-by-two with task action retry count. (#18755)
The property druid.peon.taskActionClient.retry.maxRetryCount was
being decremented prior to being passed to StandardRetryPolicy as
the value of maxAttempts. It should actually be incremented: if there
are N retries that means N + 1 total attempts.
---
.../actions/RemoteTaskActionClientFactory.java | 23 ++++--
.../actions/RemoteTaskActionClientFactoryTest.java | 85 ++++++++++++++++++++++
2 files changed, 103 insertions(+), 5 deletions(-)
diff --git
a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
index 37bf503118e..50e6243c906 100644
---
a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
+++
b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
@@ -33,6 +33,7 @@ import org.apache.druid.rpc.ServiceLocator;
import org.apache.druid.rpc.StandardRetryPolicy;
/**
+ *
*/
public class RemoteTaskActionClientFactory implements TaskActionClientFactory
{
@@ -50,11 +51,7 @@ public class RemoteTaskActionClientFactory implements
TaskActionClientFactory
this.overlordClient = clientFactory.makeClient(
NodeRole.OVERLORD.toString(),
serviceLocator,
- StandardRetryPolicy.builder()
- .maxAttempts(retryPolicyConfig.getMaxRetryCount() - 1)
-
.minWaitMillis(retryPolicyConfig.getMinWait().toStandardDuration().getMillis())
-
.maxWaitMillis(retryPolicyConfig.getMaxWait().toStandardDuration().getMillis())
- .build()
+ buildRetryPolicy(retryPolicyConfig)
);
this.jsonMapper = jsonMapper;
}
@@ -64,4 +61,20 @@ public class RemoteTaskActionClientFactory implements
TaskActionClientFactory
{
return new RemoteTaskActionClient(task, overlordClient, jsonMapper);
}
+
+ /**
+ * Converts a {@link RetryPolicyConfig} to a {@link StandardRetryPolicy}.
+ *
+ * @param retryPolicyConfig the retry policy configuration
+ *
+ * @return the standard retry policy
+ */
+ static StandardRetryPolicy buildRetryPolicy(final RetryPolicyConfig
retryPolicyConfig)
+ {
+ return StandardRetryPolicy.builder()
+
.maxAttempts(retryPolicyConfig.getMaxRetryCount() + 1)
+
.minWaitMillis(retryPolicyConfig.getMinWait().toStandardDuration().getMillis())
+
.maxWaitMillis(retryPolicyConfig.getMaxWait().toStandardDuration().getMillis())
+ .build();
+ }
}
diff --git
a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactoryTest.java
b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactoryTest.java
new file mode 100644
index 00000000000..0c504c53687
--- /dev/null
+++
b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactoryTest.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.actions;
+
+import org.apache.druid.indexing.common.RetryPolicyConfig;
+import org.apache.druid.rpc.StandardRetryPolicy;
+import org.joda.time.Period;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RemoteTaskActionClientFactoryTest
+{
+ @Test
+ public void test_buildRetryPolicy_withDefaultConfig()
+ {
+ final RetryPolicyConfig config = new RetryPolicyConfig();
+ final StandardRetryPolicy retryPolicy =
RemoteTaskActionClientFactory.buildRetryPolicy(config);
+
+ // Default maxRetryCount is 13, so maxAttempts should be 14 (13 retries +
1 initial attempt)
+ Assert.assertEquals(14, retryPolicy.maxAttempts());
+
+ // Default minWait is PT5S (5 seconds)
+ Assert.assertEquals(5000, retryPolicy.minWaitMillis());
+
+ // Default maxWait is PT1M (1 minute)
+ Assert.assertEquals(60000, retryPolicy.maxWaitMillis());
+ }
+
+ @Test
+ public void test_buildRetryPolicy_withCustomConfig()
+ {
+ final RetryPolicyConfig config = new RetryPolicyConfig()
+ .setMaxRetryCount(5)
+ .setMinWait(new Period("PT10S"))
+ .setMaxWait(new Period("PT2M"));
+
+ final StandardRetryPolicy retryPolicy =
RemoteTaskActionClientFactory.buildRetryPolicy(config);
+
+ // maxRetryCount is 5, so maxAttempts should be 6 (5 retries + 1 initial
attempt)
+ Assert.assertEquals(6, retryPolicy.maxAttempts());
+
+ // minWait is PT10S (10 seconds)
+ Assert.assertEquals(10000, retryPolicy.minWaitMillis());
+
+ // maxWait is PT2M (2 minutes)
+ Assert.assertEquals(120000, retryPolicy.maxWaitMillis());
+ }
+
+ @Test
+ public void test_buildRetryPolicy_withZeroRetries()
+ {
+ final RetryPolicyConfig config = new RetryPolicyConfig()
+ .setMaxRetryCount(0)
+ .setMinWait(new Period("PT1S"))
+ .setMaxWait(new Period("PT30S"));
+
+ final StandardRetryPolicy retryPolicy =
RemoteTaskActionClientFactory.buildRetryPolicy(config);
+
+ // maxRetryCount is 0, so maxAttempts should be 1 (0 retries + 1 initial
attempt)
+ Assert.assertEquals(1, retryPolicy.maxAttempts());
+
+ // minWait is PT1S (1 second)
+ Assert.assertEquals(1000, retryPolicy.minWaitMillis());
+
+ // maxWait is PT30S (30 seconds)
+ Assert.assertEquals(30000, retryPolicy.maxWaitMillis());
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]