(solr) branch branch_9x updated: SOLR-17151 backport to 9x (#2859) - check cpu limits between components

gus Mon, 11 Nov 2024 16:46:31 -0800

This is an automated email from the ASF dual-hosted git repository.

gus pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git



The following commit(s) were added to refs/heads/branch_9x by this push:
     new e04dfd3a620 SOLR-17151 backport to 9x (#2859) - check cpu limits 
between components
e04dfd3a620 is described below

commit e04dfd3a6209515dcecc75f14a3c7dcb022f572c
Author: Gus Heck <[email protected]>
AuthorDate: Mon Nov 11 19:46:04 2024 -0500

    SOLR-17151 backport to 9x (#2859) - check cpu limits between components
---
 .../apache/solr/handler/RequestHandlerBase.java    |   3 +
 .../solr/handler/component/QueryComponent.java     |  12 +-
 .../solr/handler/component/ResponseBuilder.java    |  12 +-
 .../solr/handler/component/SearchHandler.java      | 249 +++++++++++++++------
 .../org/apache/solr/request/SolrRequestInfo.java   |  10 +-
 .../apache/solr/response/SolrQueryResponse.java    |   5 +-
 .../java/org/apache/solr/search/QueryLimits.java   |  17 +-
 .../solr/core/ExitableDirectoryReaderTest.java     |  21 +-
 ...wedLimit.java => ComponentStageLimitsTest.java} | 212 +++++++-----------
 .../apache/solr/search/TestCpuAllowedLimit.java    |  14 +-
 .../solr/client/solrj/impl/LBSolrClient.java       |   7 +-
 11 files changed, 337 insertions(+), 225 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java 
b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index b0c17ac687a..1d7ba554bc7 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -44,6 +44,7 @@ import org.apache.solr.metrics.SolrMetricsContext;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrRequestHandler;
 import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.QueryLimitsExceededException;
 import org.apache.solr.search.SyntaxError;
 import org.apache.solr.security.PermissionNameProvider;
 import org.apache.solr.update.processor.DistributedUpdateProcessor;
@@ -231,6 +232,8 @@ public abstract class RequestHandlerBase
         metrics.numTimeouts.mark();
         rsp.setHttpCaching(false);
       }
+    } catch (QueryLimitsExceededException e) {
+      rsp.setPartialResults(req);
     } catch (Exception e) {
       e = normalizeReceivedException(req, e);
       processErrorMetricsOnException(e, metrics);
diff --git 
a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java 
b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index 3acb78ed78f..2a2363dab3b 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -615,17 +615,23 @@ public class QueryComponent extends SearchComponent {
   }
 
   protected int regularDistributedProcess(ResponseBuilder rb) {
-    if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY) return 
ResponseBuilder.STAGE_PARSE_QUERY;
+    if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY) {
+      return ResponseBuilder.STAGE_PARSE_QUERY;
+    }
     if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
       createDistributedStats(rb);
       return ResponseBuilder.STAGE_EXECUTE_QUERY;
     }
-    if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) return 
ResponseBuilder.STAGE_EXECUTE_QUERY;
+    if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) {
+      return ResponseBuilder.STAGE_EXECUTE_QUERY;
+    }
     if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
       createMainQuery(rb);
       return ResponseBuilder.STAGE_GET_FIELDS;
     }
-    if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) return 
ResponseBuilder.STAGE_GET_FIELDS;
+    if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) {
+      return ResponseBuilder.STAGE_GET_FIELDS;
+    }
     if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS && 
!rb.onePassDistributedQuery) {
       createRetrieveDocs(rb);
       return ResponseBuilder.STAGE_DONE;
diff --git 
a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java 
b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
index 80d0b726ff7..0216063d2f6 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
@@ -123,13 +123,13 @@ public class ResponseBuilder {
    * public static final String LOCAL_SHARD = "local"; public static final 
String DOC_QUERY = "dq";
    * *
    */
-  public static int STAGE_START = 0;
+  public static final int STAGE_START = 0;
 
-  public static int STAGE_PARSE_QUERY = 1000;
-  public static int STAGE_TOP_GROUPS = 1500;
-  public static int STAGE_EXECUTE_QUERY = 2000;
-  public static int STAGE_GET_FIELDS = 3000;
-  public static int STAGE_DONE = Integer.MAX_VALUE;
+  public static final int STAGE_PARSE_QUERY = 1000;
+  public static final int STAGE_TOP_GROUPS = 1500;
+  public static final int STAGE_EXECUTE_QUERY = 2000;
+  public static final int STAGE_GET_FIELDS = 3000;
+  public static final int STAGE_DONE = Integer.MAX_VALUE;
 
   public int stage; // What stage is this current request at?
 
diff --git 
a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java 
b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
index e70b286e9f3..585b6e9a5c2 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
@@ -18,8 +18,17 @@ package org.apache.solr.handler.component;
 
 import static org.apache.solr.common.params.CommonParams.DISTRIB;
 import static org.apache.solr.common.params.CommonParams.PATH;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_DONE;
+import static 
org.apache.solr.handler.component.ResponseBuilder.STAGE_EXECUTE_QUERY;
+import static 
org.apache.solr.handler.component.ResponseBuilder.STAGE_GET_FIELDS;
+import static 
org.apache.solr.handler.component.ResponseBuilder.STAGE_PARSE_QUERY;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_START;
+import static 
org.apache.solr.handler.component.ResponseBuilder.STAGE_TOP_GROUPS;
+import static org.apache.solr.request.SolrRequestInfo.getQueryLimits;
+import static 
org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY;
 
 import com.codahale.metrics.Counter;
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.lang.invoke.MethodHandles;
@@ -31,6 +40,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
 import org.apache.lucene.index.ExitableDirectoryReader;
 import org.apache.lucene.search.TotalHits;
 import org.apache.solr.client.solrj.SolrRequest.SolrRequestType;
@@ -412,24 +423,58 @@ public class SearchHandler extends RequestHandlerBase
       return; // Circuit breaker tripped, return immediately
     }
 
-    // creates a ShardHandler object only if it's needed
-    final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb);
+    processComponents(req, rsp, rb, timer, components);
 
-    if (timer == null) {
-      // non-debugging prepare phase
-      for (SearchComponent c : components) {
-        c.prepare(rb);
-      }
-    } else {
-      // debugging prepare phase
-      RTimerTree subt = timer.sub("prepare");
-      for (SearchComponent c : components) {
-        rb.setTimer(subt.sub(c.getName()));
-        c.prepare(rb);
-        rb.getTimer().stop();
+    // SOLR-5550: still provide shards.info if requested even for a 
short-circuited distrib request
+    if (!rb.isDistrib
+        && req.getParams().getBool(ShardParams.SHARDS_INFO, false)
+        && rb.shortCircuitedURL != null) {
+      NamedList<Object> shardInfo = new SimpleOrderedMap<>();
+      SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
+      if (rsp.getException() != null) {
+        Throwable cause = rsp.getException();
+        if (cause instanceof SolrServerException) {
+          cause = ((SolrServerException) cause).getRootCause();
+        } else {
+          if (cause.getCause() != null) {
+            cause = cause.getCause();
+          }
+        }
+        nl.add("error", cause.toString());
+        if (!core.getCoreContainer().hideStackTrace()) {
+          StringWriter trace = new StringWriter();
+          cause.printStackTrace(new PrintWriter(trace));
+          nl.add("trace", trace.toString());
+        }
+      } else if (rb.getResults() != null) {
+        nl.add("numFound", rb.getResults().docList.matches());
+        nl.add(
+            "numFoundExact",
+            rb.getResults().docList.hitCountRelation() == 
TotalHits.Relation.EQUAL_TO);
+        nl.add("maxScore", rb.getResults().docList.maxScore());
       }
-      subt.stop();
+      nl.add("shardAddress", rb.shortCircuitedURL);
+      nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this 
request so far
+
+      int pos = rb.shortCircuitedURL.indexOf("://");
+      String shardInfoName =
+          pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) : 
rb.shortCircuitedURL;
+      shardInfo.add(shardInfoName, nl);
+      rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
     }
+  }
+
+  private void processComponents(
+      SolrQueryRequest req,
+      SolrQueryResponse rsp,
+      ResponseBuilder rb,
+      RTimerTree timer,
+      List<SearchComponent> components)
+      throws IOException {
+    // creates a ShardHandler object only if it's needed
+    final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb);
+
+    if (!prepareComponents(req, rb, timer, components)) return;
 
     { // Once all of our components have been prepared, check if this request 
involves a SortSpec.
       // If it does, and if our request includes a cursorMark param, then 
parse & init the
@@ -446,19 +491,26 @@ public class SearchHandler extends RequestHandlerBase
 
     if (!rb.isDistrib) {
       // a normal non-distributed request
-
       try {
         // The semantics of debugging vs not debugging are different enough 
that
         // it makes sense to have two control loops
         if (!rb.isDebug()) {
           // Process
           for (SearchComponent c : components) {
+            if (checkLimitsBefore(c, "process", rb.req, rb.rsp, components)) {
+              shortCircuitedResults(req, rb);
+              return;
+            }
             c.process(rb);
           }
         } else {
           // Process
           RTimerTree subt = timer.sub("process");
           for (SearchComponent c : components) {
+            if (checkLimitsBefore(c, "process debug", rb.req, rb.rsp, 
components)) {
+              shortCircuitedResults(req, rb);
+              return;
+            }
             rb.setTimer(subt.sub(c.getName()));
             c.process(rb);
             rb.getTimer().stop();
@@ -472,22 +524,7 @@ public class SearchHandler extends RequestHandlerBase
         }
       } catch (ExitableDirectoryReader.ExitingReaderException ex) {
         log.warn("Query: {}; ", req.getParamString(), ex);
-        if (rb.rsp.getResponse() == null) {
-          rb.rsp.addResponse(new SolrDocumentList());
-
-          // If a cursorMark was passed, and we didn't progress, set
-          // the nextCursorMark to the same position
-          String cursorStr = 
rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
-          if (null != cursorStr) {
-            rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
-          }
-        }
-        if (rb.isDebug()) {
-          NamedList<Object> debug = new NamedList<>();
-          debug.add("explain", new NamedList<>());
-          rb.rsp.add("debug", debug);
-        }
-        rb.rsp.setPartialResults(rb.req);
+        shortCircuitedResults(req, rb);
       }
     } else {
       // a distributed request
@@ -504,7 +541,10 @@ public class SearchHandler extends RequestHandlerBase
 
         // call all components
         for (SearchComponent c : components) {
-          // the next stage is the minimum of what all components report
+          if (checkLimitsBefore(c, "distrib", rb.req, rb.rsp, components)) {
+            shortCircuitedResults(req, rb);
+            return;
+          } // the next stage is the minimum of what all components report
           nextStage = Math.min(nextStage, c.distributedProcess(rb));
         }
 
@@ -558,6 +598,8 @@ public class SearchHandler extends RequestHandlerBase
                     ? shardHandler1.takeCompletedIncludingErrors()
                     : shardHandler1.takeCompletedOrError();
             if (srsp == null) break; // no more requests to wait for
+            AtomicReference<Object> detailMesg =
+                new AtomicReference<>(); // or perhaps new Object[1] ?
 
             boolean anyResponsesPartial =
                 srsp.getShardRequest().responses.stream()
@@ -568,9 +610,20 @@ public class SearchHandler extends RequestHandlerBase
                             return false;
                           }
                           Object recursive = 
resp.findRecursive("responseHeader", "partialResults");
+                          if (recursive != null) {
+                            Object message =
+                                "[Shard:"
+                                    + response.getShardAddress()
+                                    + "]"
+                                    + resp.findRecursive(
+                                        "responseHeader",
+                                        
RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+                            detailMesg.compareAndSet(null, message); // first 
one, ingore rest
+                          }
                           return recursive != null;
                         });
             if (anyResponsesPartial) {
+              rb.rsp.addPartialResponseDetail(detailMesg.get());
               rsp.setPartialResults(rb.req);
             }
             // Was there an exception?
@@ -602,56 +655,122 @@ public class SearchHandler extends RequestHandlerBase
 
             // let the components see the responses to the request
             for (SearchComponent c : components) {
+              if (checkLimitsBefore(
+                  c,
+                  "handleResponses next stage:" + stageInEnglish(nextStage),
+                  rb.req,
+                  rb.rsp,
+                  components)) {
+                shortCircuitedResults(req, rb);
+                return;
+              }
               c.handleResponses(rb, srsp.getShardRequest());
             }
           }
         }
 
         for (SearchComponent c : components) {
+          if (checkLimitsBefore(
+              c, "finishStage stage:" + stageInEnglish(nextStage), rb.req, 
rb.rsp, components)) {
+            return;
+          }
           c.finishStage(rb);
         }
 
         // we are done when the next stage is MAX_VALUE
       } while (nextStage != Integer.MAX_VALUE);
     }
+  }
 
-    // SOLR-5550: still provide shards.info if requested even for a short 
circuited distrib request
-    if (!rb.isDistrib
-        && req.getParams().getBool(ShardParams.SHARDS_INFO, false)
-        && rb.shortCircuitedURL != null) {
-      NamedList<Object> shardInfo = new SimpleOrderedMap<>();
-      SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
-      if (rsp.getException() != null) {
-        Throwable cause = rsp.getException();
-        if (cause instanceof SolrServerException) {
-          cause = ((SolrServerException) cause).getRootCause();
-        } else {
-          if (cause.getCause() != null) {
-            cause = cause.getCause();
-          }
+  private static boolean prepareComponents(
+      SolrQueryRequest req, ResponseBuilder rb, RTimerTree timer, 
List<SearchComponent> components)
+      throws IOException {
+    if (timer == null) {
+      // non-debugging prepare phase
+      for (SearchComponent component : components) {
+        if (checkLimitsBefore(component, "prepare", rb.req, rb.rsp, 
components)) {
+          shortCircuitedResults(req, rb);
+          return false;
         }
-        nl.add("error", cause.toString());
-        if (!core.getCoreContainer().hideStackTrace()) {
-          StringWriter trace = new StringWriter();
-          cause.printStackTrace(new PrintWriter(trace));
-          nl.add("trace", trace.toString());
+        component.prepare(rb);
+      }
+    } else {
+      // debugging prepare phase
+      RTimerTree subt = timer.sub("prepare");
+      for (SearchComponent c : components) {
+        if (checkLimitsBefore(c, "prepare debug", rb.req, rb.rsp, components)) 
{
+          shortCircuitedResults(req, rb);
+          return false;
         }
-      } else if (rb.getResults() != null) {
-        nl.add("numFound", rb.getResults().docList.matches());
-        nl.add(
-            "numFoundExact",
-            rb.getResults().docList.hitCountRelation() == 
TotalHits.Relation.EQUAL_TO);
-        nl.add("maxScore", rb.getResults().docList.maxScore());
+        rb.setTimer(subt.sub(c.getName()));
+        c.prepare(rb);
+        rb.getTimer().stop();
       }
-      nl.add("shardAddress", rb.shortCircuitedURL);
-      nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this 
request so far
+      subt.stop();
+    }
+    return true;
+  }
 
-      int pos = rb.shortCircuitedURL.indexOf("://");
-      String shardInfoName =
-          pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) : 
rb.shortCircuitedURL;
-      shardInfo.add(shardInfoName, nl);
-      rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
+  private static String stageInEnglish(int nextStage) {
+    // This should probably be a enum, but that change should be its own 
ticket.
+    switch (nextStage) {
+      case STAGE_START:
+        return "START";
+      case STAGE_PARSE_QUERY:
+        return "PARSE_QUERY";
+      case STAGE_TOP_GROUPS:
+        return "TOP_GROUPS";
+      case STAGE_EXECUTE_QUERY:
+        return "EXECUTE_QUERY";
+      case STAGE_GET_FIELDS:
+        return "GET_FIELDS";
+        // nobody wants to think it was DONE and canceled after it completed...
+      case STAGE_DONE:
+        return "FINISHING";
+      default:
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR, "Unrecognized stage:" + 
nextStage);
+    }
+  }
+
+  private static void shortCircuitedResults(SolrQueryRequest req, 
ResponseBuilder rb) {
+
+    if (rb.rsp.getResponse() == null) {
+      rb.rsp.addResponse(new SolrDocumentList());
+
+      // If a cursorMark was passed, and we didn't progress, set
+      // the nextCursorMark to the same position
+      String cursorStr = 
rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
+      if (null != cursorStr) {
+        rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
+      }
     }
+    if (rb.isDebug()) {
+      NamedList<Object> debug = new NamedList<>();
+      debug.add("explain", new NamedList<>());
+      rb.rsp.add("debug", debug);
+    }
+    rb.rsp.setPartialResults(rb.req);
+  }
+
+  private static boolean checkLimitsBefore(
+      SearchComponent c,
+      String when,
+      SolrQueryRequest req,
+      SolrQueryResponse resp,
+      List<SearchComponent> components) {
+
+    return getQueryLimits(req, resp)
+        .maybeExitWithPartialResults(
+            () ->
+                "["
+                    + when
+                    + "] Limit(s) exceeded prior to "
+                    + c.getName()
+                    + " in "
+                    + components.stream()
+                        .map(SearchComponent::getName)
+                        .collect(Collectors.toList()));
   }
 
   private static void throwSolrException(Throwable shardResponseException) 
throws SolrException {
diff --git a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java 
b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
index 61b51a6af20..444867b430b 100644
--- a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
+++ b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
@@ -253,10 +253,12 @@ public class SolrRequestInfo {
    */
   public QueryLimits getLimits() {
     // make sure the ThreadCpuTime is always initialized
-    return req == null || rsp == null
-        ? QueryLimits.NONE
-        : (QueryLimits)
-            req.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new 
QueryLimits(req, rsp));
+    return req == null || rsp == null ? QueryLimits.NONE : getQueryLimits(req, 
rsp);
+  }
+
+  public static QueryLimits getQueryLimits(SolrQueryRequest request, 
SolrQueryResponse response) {
+    return (QueryLimits)
+        request.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new 
QueryLimits(request, response));
   }
 
   public SolrDispatchFilter.Action getAction() {
diff --git a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java 
b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
index b399ce57b79..5676a63fe10 100644
--- a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
+++ b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
@@ -207,7 +207,10 @@ public class SolrQueryResponse {
    */
   public void addPartialResponseDetail(Object detail) {
     NamedList<Object> header = getResponseHeader();
-    if (header != null && detail != null) {
+    // never overwrite the original detail message. The first limit violation 
is the important one.
+    if (header != null
+        && detail != null
+        && header.get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY) == null) {
       header.add(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY, detail);
     }
   }
diff --git a/solr/core/src/java/org/apache/solr/search/QueryLimits.java 
b/solr/core/src/java/org/apache/solr/search/QueryLimits.java
index 7aee5657678..e6e0db5eed9 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryLimits.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryLimits.java
@@ -16,12 +16,14 @@
  */
 package org.apache.solr.search;
 
+import static 
org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY;
 import static org.apache.solr.search.CpuAllowedLimit.hasCpuLimit;
 import static org.apache.solr.search.TimeAllowedLimit.hasTimeLimit;
 
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
+import java.util.function.Supplier;
 import org.apache.lucene.index.QueryTimeout;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
@@ -113,7 +115,8 @@ public class QueryLimits implements QueryTimeout {
    * @throws QueryLimitsExceededException if {@link #allowPartialResults} is 
false and limits have
    *     been reached.
    */
-  public boolean maybeExitWithPartialResults(String label) throws 
QueryLimitsExceededException {
+  public boolean maybeExitWithPartialResults(Supplier<String> label)
+      throws QueryLimitsExceededException {
     if (isLimitsEnabled() && shouldExit()) {
       if (allowPartialResults) {
         if (rsp != null) {
@@ -124,17 +127,25 @@ public class QueryLimits implements QueryTimeout {
                 "No request active, but attempting to exit with partial 
results?");
           }
           rsp.setPartialResults(requestInfo.getReq());
-          rsp.addPartialResponseDetail(formatExceptionMessage(label));
+          if 
(rsp.getResponseHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY) == 
null) {
+            // don't want to add duplicate keys. Although technically legal, 
there's a strong risk
+            // that clients won't anticipate it and break.
+            rsp.addPartialResponseDetail(formatExceptionMessage(label.get()));
+          }
         }
         return true;
       } else {
-        throw new QueryLimitsExceededException(formatExceptionMessage(label));
+        throw new 
QueryLimitsExceededException(formatExceptionMessage(label.get()));
       }
     } else {
       return false;
     }
   }
 
+  public boolean maybeExitWithPartialResults(String label) throws 
QueryLimitsExceededException {
+    return maybeExitWithPartialResults(() -> label);
+  }
+
   /**
    * Method to diagnose limit exceeded. Note that while this should always 
list the exceeded limit,
    * it may also nominate additional limits that have been exceeded since the 
actual check that
diff --git 
a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java 
b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
index 1ceb9636b20..f2e4aaa3c8f 100644
--- a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
+++ b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
@@ -32,7 +32,7 @@ import org.junit.Test;
  */
 public class ExitableDirectoryReaderTest extends SolrTestCaseJ4 {
 
-  static int NUM_DOCS = 100;
+  static final int NUM_DOCS = 100;
   static final String assertionString = "/response/numFound==" + NUM_DOCS;
   static final String failureAssertionString = 
"/responseHeader/partialResults==true]";
   static final String longTimeout = "10000";
@@ -70,9 +70,26 @@ public class ExitableDirectoryReaderTest extends 
SolrTestCaseJ4 {
 
     // this time we should get a query cache hit and hopefully no exception?  
this may change in the
     // future if time checks are put into other places.
-    assertJQ(req("q", q, "timeAllowed", "1", "sleep", sleep), assertionString);
+
+    // 2024-4-15: it did change..., and now this fails with 1 or 2 ms and 
passes with 3ms... I see
+    // no way this won't be terribly brittle. Maybe TestInjection of some sort 
to bring this back?
+
+    // assertJQ(req("q", q, "timeAllowed", "2", "sleep", sleep), 
assertionString);
+
+    // The idea that the request won't time out due to caching is a flawed 
test methodology,
+    // It relies on the test running quickly and not stalling. The above test 
should possibly
+    // be doing something along the lines of this (but we lack api for it)
+    //
+    //    SolrCores solrCores = 
ExitableDirectoryReaderTest.h.getCoreContainer().solrCores;
+    //    List<SolrCore> cores = solrCores.getCores();
+    //    for (SolrCore core : cores) {
+    //      if (<<< find the right core >>> ) {
+    //        ((SolrCache)core.getSearcher().get().<<<check cache for a key 
like name:a* >>>
+    //      }
+    //    }
 
     // now do the same for the filter cache
+    // 2024-4-15: this still passes probably because *:* is so fast, but it 
still worries me
     assertJQ(req("q", "*:*", "fq", q, "timeAllowed", "1", "sleep", sleep), 
failureAssertionString);
 
     // make sure that the result succeeds this time, and that a bad filter 
wasn't cached
diff --git a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java 
b/solr/core/src/test/org/apache/solr/search/ComponentStageLimitsTest.java
similarity index 55%
copy from solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
copy to solr/core/src/test/org/apache/solr/search/ComponentStageLimitsTest.java
index 274994099f1..c3c5d72548e 100644
--- a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
+++ b/solr/core/src/test/org/apache/solr/search/ComponentStageLimitsTest.java
@@ -16,11 +16,11 @@
  */
 package org.apache.solr.search;
 
+import static 
org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY;
+
 import java.lang.invoke.MethodHandles;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
@@ -35,7 +35,7 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class TestCpuAllowedLimit extends SolrCloudTestCase {
+public class ComponentStageLimitsTest extends SolrCloudTestCase {
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private static final String COLLECTION = "test";
@@ -65,6 +65,9 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
 
   @BeforeClass
   public static void setupClass() throws Exception {
+    // Note: copied from TestCpuAllowedLimit.java, Segments not likely 
important
+    // at the moment, but setup retained for now.
+
     // Using NoMergePolicy and 100 commits we should get 100 segments (across 
all shards).
     // At this point of writing MAX_SEGMENTS_PER_SLICE in lucene is 5, so we 
should be
     // ensured that any multithreaded testing will create 20 executable tasks 
for the
@@ -91,42 +94,11 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
   }
 
   @Test
-  public void testCompareToWallClock() throws Exception {
+  public void testLimitPrepare() throws Exception {
     Assume.assumeTrue("Thread CPU time monitoring is not available", 
ThreadCpuTimer.isSupported());
-    long limitMs = 100;
-    CpuAllowedLimit cpuLimit = new CpuAllowedLimit(limitMs);
-    int[] randoms = new int[100];
-    long startNs = System.nanoTime();
-    int wakeups = 0;
-    while (!cpuLimit.shouldExit()) {
-      Thread.sleep(1);
-      // do some busywork
-      for (int i = 0; i < randoms.length; i++) {
-        randoms[i] = random().nextInt();
-      }
-      wakeups++;
-    }
-    long endNs = System.nanoTime();
-    long wallTimeDeltaMs = TimeUnit.MILLISECONDS.convert(endNs - startNs, 
TimeUnit.NANOSECONDS);
-    log.info(
-        "CPU limit: {} ms, elapsed wall-clock: {} ms, wakeups: {}",
-        limitMs,
-        wallTimeDeltaMs,
-        wakeups);
-    assertTrue(
-        "Elapsed wall-clock time expected much larger than 100ms but was " + 
wallTimeDeltaMs,
-        limitMs < wallTimeDeltaMs);
-  }
-
-  @Test
-  public void testDistribLimit() throws Exception {
-    Assume.assumeTrue("Thread CPU time monitoring is not available", 
ThreadCpuTimer.isSupported());
-
     SolrClient solrClient = cluster.getSolrClient();
-
-    // no limits set - should eventually complete
-    log.info("--- No limits, full results ---");
     long sleepMs = 1000;
+    log.info("--- 500ms limit, 1000ms  prepare phase - partial results ---");
     QueryResponse rsp =
         solrClient.query(
             COLLECTION,
@@ -138,18 +110,28 @@ public class TestCpuAllowedLimit extends 
SolrCloudTestCase {
                 ExpensiveSearchComponent.SLEEP_MS_PARAM,
                 String.valueOf(sleepMs),
                 "stages",
-                "prepare,process"));
-    // System.err.println("rsp=" + rsp.jsonStr());
+                "prepare",
+                "timeAllowed",
+                "500"));
+    System.err.println("rsp=" + rsp.jsonStr());
     assertEquals(rsp.getHeader().get("status"), 0);
-    Number qtime = (Number) rsp.getHeader().get("QTime");
-    assertTrue("QTime expected " + qtime + " >> " + sleepMs, qtime.longValue() 
> sleepMs);
-    assertNull("should not have partial results", 
rsp.getHeader().get("partialResults"));
-    TestInjection.measureCpu();
-    // 25 ms per 5 segments ~175ms each shard
-    TestInjection.cpuTimerDelayInjectedNS = new AtomicInteger(25_000_000);
-    // timeAllowed set, should return partial results
-    log.info("--- timeAllowed, partial results ---");
-    rsp =
+    assertNotNull("should have partial results", 
rsp.getHeader().get("partialResults"));
+    String details = (String) 
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+    assertNotNull(details);
+    assertTrue(details.contains("[prepare]"));
+    assertTrue(
+        details.contains("exceeded prior to query in 
[expensiveSearchComponent, query, facet,"));
+  }
+
+  @Test
+  public void testLimitProcess() throws Exception {
+    Assume.assumeTrue("Thread CPU time monitoring is not available", 
ThreadCpuTimer.isSupported());
+    SolrClient solrClient = cluster.getSolrClient();
+    String msg = "--- 500ms limit, 1000ms process phase - partial results ---";
+    log.info(msg);
+    System.out.println(msg);
+    int sleepMs = 2000;
+    QueryResponse rsp =
         solrClient.query(
             COLLECTION,
             params(
@@ -160,16 +142,28 @@ public class TestCpuAllowedLimit extends 
SolrCloudTestCase {
                 ExpensiveSearchComponent.SLEEP_MS_PARAM,
                 String.valueOf(sleepMs),
                 "stages",
-                "prepare,process",
-                "multiThreaded",
-                "false",
+                "process",
                 "timeAllowed",
-                "500"));
-    // System.err.println("rsp=" + rsp.jsonStr());
-    assertEquals("should have partial results", true, 
rsp.getHeader().get("partialResults"));
+                "1000"));
+    System.err.println("rsp=" + rsp.jsonStr());
+    assertEquals(rsp.getHeader().get("status"), 0);
+    String details = (String) 
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+    System.out.println("details=" + details);
+    assertNotNull(details);
+    assertTrue(details.contains("[process]"));
+    assertTrue(
+        details.contains("exceeded prior to query in 
[expensiveSearchComponent, query, facet,"));
+    assertNotNull("should have partial results", 
rsp.getHeader().get("partialResults"));
+  }
 
-    log.info("--- timeAllowed, partial results, multithreading ---");
-    rsp =
+  @Test
+  public void testLimitFinish() throws Exception {
+    Assume.assumeTrue("Thread CPU time monitoring is not available", 
ThreadCpuTimer.isSupported());
+    SolrClient solrClient = cluster.getSolrClient();
+    long sleepMs = 1000;
+    log.info("--- 500ms limit, 1000ms finish phase - partial results ---");
+    sleepMs = 1000;
+    QueryResponse rsp =
         solrClient.query(
             COLLECTION,
             params(
@@ -180,95 +174,47 @@ public class TestCpuAllowedLimit extends 
SolrCloudTestCase {
                 ExpensiveSearchComponent.SLEEP_MS_PARAM,
                 String.valueOf(sleepMs),
                 "stages",
-                "prepare,process",
-                "multiThreaded",
-                "true",
+                "finish",
                 "timeAllowed",
                 "500"));
-    // System.err.println("rsp=" + rsp.jsonStr());
-    assertNotNull("should have partial results", 
rsp.getHeader().get("partialResults"));
-
-    // cpuAllowed set with large value, should return full results
-    log.info("--- cpuAllowed, full results ---");
-    rsp =
-        solrClient.query(
-            COLLECTION,
-            params(
-                "q",
-                "id:*",
-                "sort",
-                "id desc",
-                "stages",
-                "prepare,process",
-                "cpuAllowed",
-                "10000"));
-    // System.err.println("rsp=" + rsp.jsonStr());
-    assertNull("should have full results", 
rsp.getHeader().get("partialResults"));
-    // cpuAllowed set, should return partial results
-    log.info("--- cpuAllowed 1, partial results ---");
-    rsp =
-        solrClient.query(
-            COLLECTION,
-            params(
-                "q",
-                "id:*",
-                "sort",
-                "id desc",
-                "stages",
-                "prepare,process",
-                "cpuAllowed",
-                "100",
-                "multiThreaded",
-                "false"));
-    // System.err.println("rsp=" + rsp.jsonStr());
+    System.err.println("rsp=" + rsp.jsonStr());
+    assertEquals(rsp.getHeader().get("status"), 0);
+    String details = (String) 
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+    assertNotNull(details);
+    assertTrue(details.contains("[finishStage stage:PARSE_QUERY]"));
+    assertTrue(
+        details.contains("exceeded prior to query in 
[expensiveSearchComponent, query, facet,"));
     assertNotNull("should have partial results", 
rsp.getHeader().get("partialResults"));
-    log.info("--- cpuAllowed 1, partial results omitted ---");
-    rsp =
-        solrClient.query(
-            COLLECTION,
-            params(
-                "q",
-                "id:*",
-                "sort",
-                "id desc",
-                "stages",
-                "prepare,process",
-                "cpuAllowed",
-                "100",
-                "partialResults",
-                "false",
-                "multiThreaded",
-                "false",
-                "_",
-                "foo"));
-    String s = rsp.jsonStr();
-    System.err.println("rsp=" + s);
-    assertEquals("should have partial results", "omitted", 
rsp.getHeader().get("partialResults"));
+  }
 
-    // cpuAllowed set, should return partial results
-    log.info("--- cpuAllowed 2, partial results, multi-threaded ---");
-    rsp =
+  @Test
+  public void testLimitDistrib() throws Exception {
+    Assume.assumeTrue("Thread CPU time monitoring is not available", 
ThreadCpuTimer.isSupported());
+    SolrClient solrClient = cluster.getSolrClient();
+    long sleepMs = 1000;
+    log.info("--- 500ms limit, 1000ms distrib phase - partial results ---");
+    sleepMs = 1000;
+    QueryResponse rsp =
         solrClient.query(
             COLLECTION,
             params(
                 "q",
                 "id:*",
                 "sort",
-                "id desc",
+                "id asc",
+                ExpensiveSearchComponent.SLEEP_MS_PARAM,
+                String.valueOf(sleepMs),
                 "stages",
-                "prepare,process",
-                "cpuAllowed",
-                "100",
-                "multiThreaded",
-                "true"));
-    // System.err.println("rsp=" + rsp.jsonStr());
+                "distrib",
+                "timeAllowed",
+                "500"));
+    System.err.println("rsp=" + rsp.jsonStr());
+    assertEquals(rsp.getHeader().get("status"), 0);
+    String details = (String) 
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+    assertNotNull(details);
+    assertTrue(details.contains("[distrib]"));
+    assertTrue(
+        details.contains("exceeded prior to query in 
[expensiveSearchComponent, query, facet,"));
     assertNotNull("should have partial results", 
rsp.getHeader().get("partialResults"));
   }
-
-  @Test
-  public void testDistribLimit2() throws Exception {
-    // This looks silly, but it actually guards against:
-    // https://issues.apache.org/jira/browse/SOLR-17203
-    testDistribLimit();
-  }
 }
diff --git a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java 
b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
index 274994099f1..4611de10eca 100644
--- a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
+++ b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
@@ -169,6 +169,7 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
     assertEquals("should have partial results", true, 
rsp.getHeader().get("partialResults"));
 
     log.info("--- timeAllowed, partial results, multithreading ---");
+
     rsp =
         solrClient.query(
             COLLECTION,
@@ -201,11 +202,11 @@ public class TestCpuAllowedLimit extends 
SolrCloudTestCase {
                 "stages",
                 "prepare,process",
                 "cpuAllowed",
-                "10000"));
-    // System.err.println("rsp=" + rsp.jsonStr());
+                "100000"));
+    System.err.println("rsp=" + rsp.jsonStr());
     assertNull("should have full results", 
rsp.getHeader().get("partialResults"));
     // cpuAllowed set, should return partial results
-    log.info("--- cpuAllowed 1, partial results ---");
+    log.info("--- cpuAllowed 100, partial results ---");
     rsp =
         solrClient.query(
             COLLECTION,
@@ -222,7 +223,7 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
                 "false"));
     // System.err.println("rsp=" + rsp.jsonStr());
     assertNotNull("should have partial results", 
rsp.getHeader().get("partialResults"));
-    log.info("--- cpuAllowed 1, partial results omitted ---");
+    log.info("--- cpuAllowed 100, partial results omitted ---");
     rsp =
         solrClient.query(
             COLLECTION,
@@ -243,10 +244,11 @@ public class TestCpuAllowedLimit extends 
SolrCloudTestCase {
                 "foo"));
     String s = rsp.jsonStr();
     System.err.println("rsp=" + s);
-    assertEquals("should have partial results", "omitted", 
rsp.getHeader().get("partialResults"));
+    assertEquals(
+        "should not have partial results", "omitted", 
rsp.getHeader().get("partialResults"));
 
     // cpuAllowed set, should return partial results
-    log.info("--- cpuAllowed 2, partial results, multi-threaded ---");
+    log.info("--- cpuAllowed 100, partial results, multi-threaded ---");
     rsp =
         solrClient.query(
             COLLECTION,
diff --git 
a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java 
b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java
index 4305809f23d..1128c086460 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java
@@ -293,9 +293,12 @@ public abstract class LBSolrClient extends SolrClient {
         suffix = ":" + zombieServers.keySet();
       }
       // Skipping check time exceeded for the first request
-      if (numServersTried > 0 && isTimeExceeded(timeAllowedNano, timeOutTime)) 
{
+      // Ugly string based hack but no live servers message here is VERY 
misleading :(
+      if ((previousEx != null && previousEx.getMessage().contains("Limits 
exceeded!"))
+          || (numServersTried > 0 && isTimeExceeded(timeAllowedNano, 
timeOutTime))) {
         throw new SolrServerException(
-            "Time allowed to handle this request exceeded" + suffix, 
previousEx);
+            "The processing limits for to this request were exceeded, see 
cause for details",
+            previousEx);
       }
       if (serverStr == null) {
         throw new SolrServerException(

(solr) branch branch_9x updated: SOLR-17151 backport to 9x (#2859) - check cpu limits between components

Reply via email to