This is an automated email from the ASF dual-hosted git repository.
janhoy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new a70d09cff0b SOLR-17890 LeaderElectionTest flaky (#3957)
a70d09cff0b is described below
commit a70d09cff0b262f5cf790cb58f5df78d557fef88
Author: Jan Høydahl <[email protected]>
AuthorDate: Thu Dec 18 09:21:42 2025 +0100
SOLR-17890 LeaderElectionTest flaky (#3957)
---
.../org/apache/solr/cloud/LeaderElectionTest.java | 32 +++++++++++++++-------
1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
index 6c430f6e64b..c92645ed710 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
@@ -35,6 +35,7 @@ import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.RetryUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.common.util.Utils;
import org.apache.zookeeper.KeeperException;
@@ -567,6 +568,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
Thread.sleep(4000);
+ // Stop stress threads first before any state changes
stopStress = true;
scheduleThread.interrupt();
@@ -579,23 +581,33 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
connLossThread.join();
killThread.join();
- int seq = threads.get(getLeaderThread()).getSeq();
+ // Retry getting leader with extended timeout to handle edge cases where
+ // getLeaderUrl() gets an unexpected exception and throws
RuntimeException
+ RetryUtil.retryOnException(
+ Exception.class,
+ 60000, // 60 seconds total timeout
+ 100, // 100ms between retries
+ () -> {
+ int seq = threads.get(getLeaderThread()).getSeq();
+ log.info("Leader election stress test completed, leader seq: {}",
seq);
+ });
- // we have a leader we know, TODO: lets check some other things
} finally {
// cleanup any threads still running
for (ClientThread thread : threads) {
- thread.close();
- }
-
- // cleanup any threads still running
- for (ClientThread thread : threads) {
- thread.es.zkClient.close();
- thread.close();
+ try {
+ thread.close();
+ } catch (Exception e) {
+ // ignore cleanup errors
+ }
}
for (Thread thread : threads) {
- thread.join();
+ try {
+ thread.join();
+ } catch (InterruptedException e) {
+ // ignore
+ }
}
}
}