This is an automated email from the ASF dual-hosted git repository.

kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new a2254b47275 Fix off-by-two with task action retry count. (#18755)
a2254b47275 is described below

commit a2254b472759a3060c2dab2c1099dcf8df4270c8
Author: Gian Merlino <[email protected]>
AuthorDate: Thu Nov 20 20:26:56 2025 -0800

    Fix off-by-two with task action retry count. (#18755)
    
    The property druid.peon.taskActionClient.retry.maxRetryCount was
    being decremented prior to being passed to StandardRetryPolicy as
    the value of maxAttempts. It should actually be incremented: if there
    are N retries that means N + 1 total attempts.
---
 .../actions/RemoteTaskActionClientFactory.java     | 23 ++++--
 .../actions/RemoteTaskActionClientFactoryTest.java | 85 ++++++++++++++++++++++
 2 files changed, 103 insertions(+), 5 deletions(-)

diff --git 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
index 37bf503118e..50e6243c906 100644
--- 
a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
+++ 
b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java
@@ -33,6 +33,7 @@ import org.apache.druid.rpc.ServiceLocator;
 import org.apache.druid.rpc.StandardRetryPolicy;
 
 /**
+ *
  */
 public class RemoteTaskActionClientFactory implements TaskActionClientFactory
 {
@@ -50,11 +51,7 @@ public class RemoteTaskActionClientFactory implements 
TaskActionClientFactory
     this.overlordClient = clientFactory.makeClient(
         NodeRole.OVERLORD.toString(),
         serviceLocator,
-        StandardRetryPolicy.builder()
-            .maxAttempts(retryPolicyConfig.getMaxRetryCount() - 1)
-            
.minWaitMillis(retryPolicyConfig.getMinWait().toStandardDuration().getMillis())
-            
.maxWaitMillis(retryPolicyConfig.getMaxWait().toStandardDuration().getMillis())
-            .build()
+        buildRetryPolicy(retryPolicyConfig)
     );
     this.jsonMapper = jsonMapper;
   }
@@ -64,4 +61,20 @@ public class RemoteTaskActionClientFactory implements 
TaskActionClientFactory
   {
     return new RemoteTaskActionClient(task, overlordClient, jsonMapper);
   }
+
+  /**
+   * Converts a {@link RetryPolicyConfig} to a {@link StandardRetryPolicy}.
+   *
+   * @param retryPolicyConfig the retry policy configuration
+   *
+   * @return the standard retry policy
+   */
+  static StandardRetryPolicy buildRetryPolicy(final RetryPolicyConfig 
retryPolicyConfig)
+  {
+    return StandardRetryPolicy.builder()
+                              
.maxAttempts(retryPolicyConfig.getMaxRetryCount() + 1)
+                              
.minWaitMillis(retryPolicyConfig.getMinWait().toStandardDuration().getMillis())
+                              
.maxWaitMillis(retryPolicyConfig.getMaxWait().toStandardDuration().getMillis())
+                              .build();
+  }
 }
diff --git 
a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactoryTest.java
 
b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactoryTest.java
new file mode 100644
index 00000000000..0c504c53687
--- /dev/null
+++ 
b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactoryTest.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.actions;
+
+import org.apache.druid.indexing.common.RetryPolicyConfig;
+import org.apache.druid.rpc.StandardRetryPolicy;
+import org.joda.time.Period;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RemoteTaskActionClientFactoryTest
+{
+  @Test
+  public void test_buildRetryPolicy_withDefaultConfig()
+  {
+    final RetryPolicyConfig config = new RetryPolicyConfig();
+    final StandardRetryPolicy retryPolicy = 
RemoteTaskActionClientFactory.buildRetryPolicy(config);
+
+    // Default maxRetryCount is 13, so maxAttempts should be 14 (13 retries + 
1 initial attempt)
+    Assert.assertEquals(14, retryPolicy.maxAttempts());
+
+    // Default minWait is PT5S (5 seconds)
+    Assert.assertEquals(5000, retryPolicy.minWaitMillis());
+
+    // Default maxWait is PT1M (1 minute)
+    Assert.assertEquals(60000, retryPolicy.maxWaitMillis());
+  }
+
+  @Test
+  public void test_buildRetryPolicy_withCustomConfig()
+  {
+    final RetryPolicyConfig config = new RetryPolicyConfig()
+        .setMaxRetryCount(5)
+        .setMinWait(new Period("PT10S"))
+        .setMaxWait(new Period("PT2M"));
+
+    final StandardRetryPolicy retryPolicy = 
RemoteTaskActionClientFactory.buildRetryPolicy(config);
+
+    // maxRetryCount is 5, so maxAttempts should be 6 (5 retries + 1 initial 
attempt)
+    Assert.assertEquals(6, retryPolicy.maxAttempts());
+
+    // minWait is PT10S (10 seconds)
+    Assert.assertEquals(10000, retryPolicy.minWaitMillis());
+
+    // maxWait is PT2M (2 minutes)
+    Assert.assertEquals(120000, retryPolicy.maxWaitMillis());
+  }
+
+  @Test
+  public void test_buildRetryPolicy_withZeroRetries()
+  {
+    final RetryPolicyConfig config = new RetryPolicyConfig()
+        .setMaxRetryCount(0)
+        .setMinWait(new Period("PT1S"))
+        .setMaxWait(new Period("PT30S"));
+
+    final StandardRetryPolicy retryPolicy = 
RemoteTaskActionClientFactory.buildRetryPolicy(config);
+
+    // maxRetryCount is 0, so maxAttempts should be 1 (0 retries + 1 initial 
attempt)
+    Assert.assertEquals(1, retryPolicy.maxAttempts());
+
+    // minWait is PT1S (1 second)
+    Assert.assertEquals(1000, retryPolicy.minWaitMillis());
+
+    // maxWait is PT30S (30 seconds)
+    Assert.assertEquals(30000, retryPolicy.maxWaitMillis());
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to