This is an automated email from the ASF dual-hosted git repository.

janhoy pushed a commit to branch branch_10_0
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_10_0 by this push:
     new 9e2daa0ba41 SOLR-17890 LeaderElectionTest flaky (#3957)
9e2daa0ba41 is described below

commit 9e2daa0ba410e3a91e2cfee6f9c72419101efa59
Author: Jan Høydahl <[email protected]>
AuthorDate: Thu Dec 18 09:21:42 2025 +0100

    SOLR-17890 LeaderElectionTest flaky (#3957)
    
    (cherry picked from commit a70d09cff0b262f5cf790cb58f5df78d557fef88)
---
 .../org/apache/solr/cloud/LeaderElectionTest.java  | 32 +++++++++++++++-------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java 
b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
index 6c430f6e64b..c92645ed710 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
@@ -35,6 +35,7 @@ import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.RetryUtil;
 import org.apache.solr.common.util.SolrNamedThreadFactory;
 import org.apache.solr.common.util.Utils;
 import org.apache.zookeeper.KeeperException;
@@ -567,6 +568,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
 
       Thread.sleep(4000);
 
+      // Stop stress threads first before any state changes
       stopStress = true;
 
       scheduleThread.interrupt();
@@ -579,23 +581,33 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
       connLossThread.join();
       killThread.join();
 
-      int seq = threads.get(getLeaderThread()).getSeq();
+      // Retry getting leader with extended timeout to handle edge cases where
+      // getLeaderUrl() gets an unexpected exception and throws 
RuntimeException
+      RetryUtil.retryOnException(
+          Exception.class,
+          60000, // 60 seconds total timeout
+          100, // 100ms between retries
+          () -> {
+            int seq = threads.get(getLeaderThread()).getSeq();
+            log.info("Leader election stress test completed, leader seq: {}", 
seq);
+          });
 
-      // we have a leader we know, TODO: lets check some other things
     } finally {
       // cleanup any threads still running
       for (ClientThread thread : threads) {
-        thread.close();
-      }
-
-      // cleanup any threads still running
-      for (ClientThread thread : threads) {
-        thread.es.zkClient.close();
-        thread.close();
+        try {
+          thread.close();
+        } catch (Exception e) {
+          // ignore cleanup errors
+        }
       }
 
       for (Thread thread : threads) {
-        thread.join();
+        try {
+          thread.join();
+        } catch (InterruptedException e) {
+          // ignore
+        }
       }
     }
   }

Reply via email to