This is an automated email from the ASF dual-hosted git repository.

krisden pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 03694366d3f SOLR-16664: TestCoordinatorRole fails docs is null (#1363)
03694366d3f is described below

commit 03694366d3fbcb999799a6559223f13caef3824d
Author: Kevin Risden <[email protected]>
AuthorDate: Wed Feb 22 09:18:07 2023 -0500

    SOLR-16664: TestCoordinatorRole fails docs is null (#1363)
    
    * Add assertion with more details on failure
---
 .../apache/solr/search/TestCoordinatorRole.java    | 138 ++++++++++-----------
 1 file changed, 63 insertions(+), 75 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/search/TestCoordinatorRole.java 
b/solr/core/src/test/org/apache/solr/search/TestCoordinatorRole.java
index 8ea9781e98f..888a05c6376 100644
--- a/solr/core/src/test/org/apache/solr/search/TestCoordinatorRole.java
+++ b/solr/core/src/test/org/apache/solr/search/TestCoordinatorRole.java
@@ -32,9 +32,10 @@ import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.Consumer;
+import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.impl.Http2SolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
@@ -47,10 +48,9 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.SolrNamedThreadFactory;
-import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.NodeRoles;
 import org.apache.solr.embedded.JettySolrRunner;
 import org.apache.solr.servlet.CoordinatorHttpSolrCall;
@@ -60,8 +60,6 @@ import org.slf4j.LoggerFactory;
 public class TestCoordinatorRole extends SolrCloudTestCase {
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  private static final long DEFAULT_TOLERANCE = 500;
-
   public void testSimple() throws Exception {
     MiniSolrCloudCluster cluster =
         configureCluster(4).addConfig("conf", 
configset("cloud-minimal")).configure();
@@ -84,7 +82,7 @@ public class TestCoordinatorRole extends SolrCloudTestCase {
       assertEquals(10, rsp.getResults().getNumFound());
 
       System.setProperty(NodeRoles.NODE_ROLES_PROP, "coordinator:on");
-      JettySolrRunner coordinatorJetty = null;
+      final JettySolrRunner coordinatorJetty;
       try {
         coordinatorJetty = cluster.startJettySolrRunner();
       } finally {
@@ -153,17 +151,17 @@ public class TestCoordinatorRole extends 
SolrCloudTestCase {
       }
       assertNotNull(nrtJetty);
       assertNotNull(pullJetty);
-      try (HttpSolrClient client = (HttpSolrClient) pullJetty.newClient()) {
+      try (SolrClient client = pullJetty.newClient()) {
         client.add(COLL, sid);
         client.commit(COLL);
         assertEquals(
             nrtCore,
             getHostCoreName(
-                COLL, qaJettyBase, client, p -> p.add("shards.preference", 
"replica.type:NRT")));
+                COLL, qaJettyBase, p -> p.add(ShardParams.SHARDS_PREFERENCE, 
"replica.type:NRT")));
         assertEquals(
             pullCore,
             getHostCoreName(
-                COLL, qaJettyBase, client, p -> p.add("shards.preference", 
"replica.type:PULL")));
+                COLL, qaJettyBase, p -> p.add(ShardParams.SHARDS_PREFERENCE, 
"replica.type:PULL")));
         // Now , kill NRT jetty
         JettySolrRunner nrtJettyF = nrtJetty;
         JettySolrRunner pullJettyF = pullJetty;
@@ -178,8 +176,7 @@ public class TestCoordinatorRole extends SolrCloudTestCase {
             executor.submit(
                 () -> {
                   // we manipulate the jetty instances in a separate thread to 
more closely mimic
-                  // the behavior we'd
-                  // see irl.
+                  // the behavior we'd see irl.
                   try {
                     Thread.sleep(establishBaselineMs);
                     log.info("stopping NRT jetty ...");
@@ -190,10 +187,8 @@ public class TestCoordinatorRole extends SolrCloudTestCase 
{
                     nrtJettyF.start(true);
                     log.info("NRT jetty restarted.");
                     // once NRT is back up, we expect PULL to continue serving 
until the TTL on ZK
-                    // state
-                    // used for query request routing has expired (60s). But 
here we force a return
-                    // to NRT
-                    // by stopping the PULL replica after a brief delay ...
+                    // state used for query request routing has expired (60s). 
But here we force a
+                    // return to NRT by stopping the PULL replica after a 
brief delay ...
                     Thread.sleep(pullServiceTimeMs);
                     log.info("stopping PULL jetty ...");
                     pullJettyF.stop();
@@ -211,8 +206,7 @@ public class TestCoordinatorRole extends SolrCloudTestCase {
                 getHostCoreName(
                     COLL,
                     qaJettyBase,
-                    client,
-                    p -> p.add("shards.preference", "replica.type:NRT")))) {
+                    p -> p.add(ShardParams.SHARDS_PREFERENCE, 
"replica.type:NRT")))) {
           count++;
           individualRequestStart = new Date().getTime();
         }
@@ -224,11 +218,9 @@ public class TestCoordinatorRole extends SolrCloudTestCase 
{
             establishBaselineMs,
             now - individualRequestStart);
         // default tolerance of 500ms below should suffice. Failover to PULL 
for this case should be
-        // very fast,
-        // because our QA-based client already knows both replicas are active, 
the index is stable,
-        // so the moment
-        // the client finds NRT is down it should be able to failover 
immediately and transparently
-        // to PULL.
+        // very fast, because our QA-based client already knows both replicas 
are active, the index
+        // is stable, so the moment the client finds NRT is down it should be 
able to failover
+        // immediately and transparently to PULL.
         assertEquals(
             "when we break out of the NRT query loop, should be b/c routed to 
PULL",
             pullCore,
@@ -261,14 +253,11 @@ public class TestCoordinatorRole extends 
SolrCloudTestCase {
             nrtDowntimeMs,
             count);
         // NRT replica is back up, registered as available with Zk, and 
availability info has been
-        // pulled down by
-        // our PULL-replica-based `client`, forwarded indexing command to NRT, 
index/commit
-        // completed. All of this
-        // accounts for the 3000ms tolerance allowed for below. This is not a 
strict value, and if
-        // it causes failures
-        // regularly we should feel free to increase the tolerance; but it's 
meant to provide a
-        // stable baseline from
-        // which to detect regressions.
+        // pulled down by our PULL-replica-based `client`, forwarded indexing 
command to NRT,
+        // index/commit completed. All of this accounts for the 3000ms 
tolerance allowed for below.
+        // This is not a strict value, and if it causes failures regularly we 
should feel free to
+        // increase the tolerance; but it's meant to provide a stable baseline 
from which to detect
+        // regressions.
         count = 0;
         start = new Date().getTime();
         individualRequestStart = start;
@@ -277,10 +266,9 @@ public class TestCoordinatorRole extends SolrCloudTestCase 
{
                 getHostCoreName(
                     COLL,
                     qaJettyBase,
-                    client,
                     p -> {
                       p.set(CommonParams.Q, "id:345");
-                      p.add("shards.preference", "replica.type:NRT");
+                      p.add(ShardParams.SHARDS_PREFERENCE, "replica.type:NRT");
                     }))) {
           count++;
           Thread.sleep(100);
@@ -296,7 +284,6 @@ public class TestCoordinatorRole extends SolrCloudTestCase {
         assertEquals(nrtCore, hostCore);
         // allow any exceptions to propagate
         jettyManipulationFuture.get();
-        if (true) return;
 
         // next phase: just toggle a bunch
         // TODO: could separate this out into a different test method, but 
this should suffice for
@@ -338,16 +325,16 @@ public class TestCoordinatorRole extends 
SolrCloudTestCase {
         start = new Date().getTime();
         try {
           do {
-            pullCore.equals(
-                hostCore =
-                    getHostCoreName(
-                        COLL,
-                        qaJettyBase,
-                        client,
-                        p -> {
-                          p.set(CommonParams.Q, "id:345");
-                          p.add("shards.preference", "replica.type:NRT");
-                        }));
+            if (pullCore.equals(
+                getHostCoreName(
+                    COLL,
+                    qaJettyBase,
+                    p -> {
+                      p.set(CommonParams.Q, "id:345");
+                      p.add(ShardParams.SHARDS_PREFERENCE, "replica.type:NRT");
+                    }))) {
+              done.set(true);
+            }
             count++;
             Thread.sleep(100);
           } while (!done.get());
@@ -380,44 +367,45 @@ public class TestCoordinatorRole extends 
SolrCloudTestCase {
     }
   }
 
-  @SuppressWarnings("rawtypes")
-  private String getHostCoreName(
-      String COLL, String qaNode, HttpSolrClient solrClient, 
Consumer<SolrQuery> p)
+  private String getHostCoreName(String COLL, String qaNode, 
Consumer<SolrQuery> p)
       throws Exception {
-
     boolean found = false;
-    SolrQuery q = new SolrQuery("*:*");
-    q.add("fl", "id,desc_s,_core_:[core]").add(OMIT_HEADER, TRUE);
+    SolrQuery q =
+        new SolrQuery(
+            CommonParams.Q,
+            "*:*",
+            CommonParams.FL,
+            "id,desc_s,_core_:[core]",
+            OMIT_HEADER,
+            TRUE,
+            CommonParams.WT,
+            CommonParams.JAVABIN);
     p.accept(q);
-    StringBuilder sb =
-        new 
StringBuilder(qaNode).append("/").append(COLL).append("/select?wt=javabin");
-    q.forEach(e -> 
sb.append("&").append(e.getKey()).append("=").append(e.getValue()[0]));
     SolrDocumentList docs = null;
-    for (int i = 0; i < 100; i++) {
-      try {
-        SimpleOrderedMap rsp =
-            (SimpleOrderedMap)
-                Utils.executeGET(solrClient.getHttpClient(), sb.toString(), 
Utils.JAVABINCONSUMER);
-        docs = (SolrDocumentList) rsp.get("response");
-        if (docs.size() > 0) {
-          found = true;
-          break;
-        }
-      } catch (SolrException ex) {
-        // we know we're doing tricky things that might cause transient errors
-        // TODO: all these query requests go to the QA node -- should QA 
propagate internal request
-        // errors
-        //  to the external client (and the external client retry?) or should 
QA attempt to failover
-        // transparently
-        //  in the event of an error?
-        if (i < 5) {
-          log.info("swallowing transient error", ex);
-        } else {
-          log.error("only expect actual _errors_ within a small window (e.g. 
500ms)", ex);
-          fail("initial error time threshold exceeded");
+    try (SolrClient solrClient = new Http2SolrClient.Builder(qaNode).build()) {
+      for (int i = 0; i < 100; i++) {
+        try {
+          QueryResponse queryResponse = solrClient.query(COLL, q);
+          docs = queryResponse.getResults();
+          assertNotNull("Docs should not be null. Query response was: " + 
queryResponse, docs);
+          if (docs.size() > 0) {
+            found = true;
+            break;
+          }
+        } catch (SolrException ex) {
+          // we know we're doing tricky things that might cause transient 
errors
+          // TODO: all these query requests go to the QA node -- should QA 
propagate internal
+          // request errors to the external client (and the external client 
retry?) or should QA
+          // attempt to failover transparently in the event of an error?
+          if (i < 5) {
+            log.info("swallowing transient error", ex);
+          } else {
+            log.error("only expect actual _errors_ within a small window (e.g. 
500ms)", ex);
+            fail("initial error time threshold exceeded");
+          }
         }
+        Thread.sleep(100);
       }
-      Thread.sleep(100);
     }
     assertTrue(found);
     return (String) docs.get(0).getFieldValue("_core_");

Reply via email to