This is an automated email from the ASF dual-hosted git repository.
gus pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new e04dfd3a620 SOLR-17151 backport to 9x (#2859) - check cpu limits
between components
e04dfd3a620 is described below
commit e04dfd3a6209515dcecc75f14a3c7dcb022f572c
Author: Gus Heck <[email protected]>
AuthorDate: Mon Nov 11 19:46:04 2024 -0500
SOLR-17151 backport to 9x (#2859) - check cpu limits between components
---
.../apache/solr/handler/RequestHandlerBase.java | 3 +
.../solr/handler/component/QueryComponent.java | 12 +-
.../solr/handler/component/ResponseBuilder.java | 12 +-
.../solr/handler/component/SearchHandler.java | 249 +++++++++++++++------
.../org/apache/solr/request/SolrRequestInfo.java | 10 +-
.../apache/solr/response/SolrQueryResponse.java | 5 +-
.../java/org/apache/solr/search/QueryLimits.java | 17 +-
.../solr/core/ExitableDirectoryReaderTest.java | 21 +-
...wedLimit.java => ComponentStageLimitsTest.java} | 212 +++++++-----------
.../apache/solr/search/TestCpuAllowedLimit.java | 14 +-
.../solr/client/solrj/impl/LBSolrClient.java | 7 +-
11 files changed, 337 insertions(+), 225 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index b0c17ac687a..1d7ba554bc7 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -44,6 +44,7 @@ import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.QueryLimitsExceededException;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.security.PermissionNameProvider;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
@@ -231,6 +232,8 @@ public abstract class RequestHandlerBase
metrics.numTimeouts.mark();
rsp.setHttpCaching(false);
}
+ } catch (QueryLimitsExceededException e) {
+ rsp.setPartialResults(req);
} catch (Exception e) {
e = normalizeReceivedException(req, e);
processErrorMetricsOnException(e, metrics);
diff --git
a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index 3acb78ed78f..2a2363dab3b 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -615,17 +615,23 @@ public class QueryComponent extends SearchComponent {
}
protected int regularDistributedProcess(ResponseBuilder rb) {
- if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY) return
ResponseBuilder.STAGE_PARSE_QUERY;
+ if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY) {
+ return ResponseBuilder.STAGE_PARSE_QUERY;
+ }
if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
createDistributedStats(rb);
return ResponseBuilder.STAGE_EXECUTE_QUERY;
}
- if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) return
ResponseBuilder.STAGE_EXECUTE_QUERY;
+ if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) {
+ return ResponseBuilder.STAGE_EXECUTE_QUERY;
+ }
if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
createMainQuery(rb);
return ResponseBuilder.STAGE_GET_FIELDS;
}
- if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) return
ResponseBuilder.STAGE_GET_FIELDS;
+ if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) {
+ return ResponseBuilder.STAGE_GET_FIELDS;
+ }
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS &&
!rb.onePassDistributedQuery) {
createRetrieveDocs(rb);
return ResponseBuilder.STAGE_DONE;
diff --git
a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
index 80d0b726ff7..0216063d2f6 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
@@ -123,13 +123,13 @@ public class ResponseBuilder {
* public static final String LOCAL_SHARD = "local"; public static final
String DOC_QUERY = "dq";
* *
*/
- public static int STAGE_START = 0;
+ public static final int STAGE_START = 0;
- public static int STAGE_PARSE_QUERY = 1000;
- public static int STAGE_TOP_GROUPS = 1500;
- public static int STAGE_EXECUTE_QUERY = 2000;
- public static int STAGE_GET_FIELDS = 3000;
- public static int STAGE_DONE = Integer.MAX_VALUE;
+ public static final int STAGE_PARSE_QUERY = 1000;
+ public static final int STAGE_TOP_GROUPS = 1500;
+ public static final int STAGE_EXECUTE_QUERY = 2000;
+ public static final int STAGE_GET_FIELDS = 3000;
+ public static final int STAGE_DONE = Integer.MAX_VALUE;
public int stage; // What stage is this current request at?
diff --git
a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
index e70b286e9f3..585b6e9a5c2 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
@@ -18,8 +18,17 @@ package org.apache.solr.handler.component;
import static org.apache.solr.common.params.CommonParams.DISTRIB;
import static org.apache.solr.common.params.CommonParams.PATH;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_DONE;
+import static
org.apache.solr.handler.component.ResponseBuilder.STAGE_EXECUTE_QUERY;
+import static
org.apache.solr.handler.component.ResponseBuilder.STAGE_GET_FIELDS;
+import static
org.apache.solr.handler.component.ResponseBuilder.STAGE_PARSE_QUERY;
+import static org.apache.solr.handler.component.ResponseBuilder.STAGE_START;
+import static
org.apache.solr.handler.component.ResponseBuilder.STAGE_TOP_GROUPS;
+import static org.apache.solr.request.SolrRequestInfo.getQueryLimits;
+import static
org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY;
import com.codahale.metrics.Counter;
+import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.invoke.MethodHandles;
@@ -31,6 +40,8 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.search.TotalHits;
import org.apache.solr.client.solrj.SolrRequest.SolrRequestType;
@@ -412,24 +423,58 @@ public class SearchHandler extends RequestHandlerBase
return; // Circuit breaker tripped, return immediately
}
- // creates a ShardHandler object only if it's needed
- final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb);
+ processComponents(req, rsp, rb, timer, components);
- if (timer == null) {
- // non-debugging prepare phase
- for (SearchComponent c : components) {
- c.prepare(rb);
- }
- } else {
- // debugging prepare phase
- RTimerTree subt = timer.sub("prepare");
- for (SearchComponent c : components) {
- rb.setTimer(subt.sub(c.getName()));
- c.prepare(rb);
- rb.getTimer().stop();
+ // SOLR-5550: still provide shards.info if requested even for a
short-circuited distrib request
+ if (!rb.isDistrib
+ && req.getParams().getBool(ShardParams.SHARDS_INFO, false)
+ && rb.shortCircuitedURL != null) {
+ NamedList<Object> shardInfo = new SimpleOrderedMap<>();
+ SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
+ if (rsp.getException() != null) {
+ Throwable cause = rsp.getException();
+ if (cause instanceof SolrServerException) {
+ cause = ((SolrServerException) cause).getRootCause();
+ } else {
+ if (cause.getCause() != null) {
+ cause = cause.getCause();
+ }
+ }
+ nl.add("error", cause.toString());
+ if (!core.getCoreContainer().hideStackTrace()) {
+ StringWriter trace = new StringWriter();
+ cause.printStackTrace(new PrintWriter(trace));
+ nl.add("trace", trace.toString());
+ }
+ } else if (rb.getResults() != null) {
+ nl.add("numFound", rb.getResults().docList.matches());
+ nl.add(
+ "numFoundExact",
+ rb.getResults().docList.hitCountRelation() ==
TotalHits.Relation.EQUAL_TO);
+ nl.add("maxScore", rb.getResults().docList.maxScore());
}
- subt.stop();
+ nl.add("shardAddress", rb.shortCircuitedURL);
+ nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this
request so far
+
+ int pos = rb.shortCircuitedURL.indexOf("://");
+ String shardInfoName =
+ pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) :
rb.shortCircuitedURL;
+ shardInfo.add(shardInfoName, nl);
+ rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
}
+ }
+
+ private void processComponents(
+ SolrQueryRequest req,
+ SolrQueryResponse rsp,
+ ResponseBuilder rb,
+ RTimerTree timer,
+ List<SearchComponent> components)
+ throws IOException {
+ // creates a ShardHandler object only if it's needed
+ final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb);
+
+ if (!prepareComponents(req, rb, timer, components)) return;
{ // Once all of our components have been prepared, check if this request
involves a SortSpec.
// If it does, and if our request includes a cursorMark param, then
parse & init the
@@ -446,19 +491,26 @@ public class SearchHandler extends RequestHandlerBase
if (!rb.isDistrib) {
// a normal non-distributed request
-
try {
// The semantics of debugging vs not debugging are different enough
that
// it makes sense to have two control loops
if (!rb.isDebug()) {
// Process
for (SearchComponent c : components) {
+ if (checkLimitsBefore(c, "process", rb.req, rb.rsp, components)) {
+ shortCircuitedResults(req, rb);
+ return;
+ }
c.process(rb);
}
} else {
// Process
RTimerTree subt = timer.sub("process");
for (SearchComponent c : components) {
+ if (checkLimitsBefore(c, "process debug", rb.req, rb.rsp,
components)) {
+ shortCircuitedResults(req, rb);
+ return;
+ }
rb.setTimer(subt.sub(c.getName()));
c.process(rb);
rb.getTimer().stop();
@@ -472,22 +524,7 @@ public class SearchHandler extends RequestHandlerBase
}
} catch (ExitableDirectoryReader.ExitingReaderException ex) {
log.warn("Query: {}; ", req.getParamString(), ex);
- if (rb.rsp.getResponse() == null) {
- rb.rsp.addResponse(new SolrDocumentList());
-
- // If a cursorMark was passed, and we didn't progress, set
- // the nextCursorMark to the same position
- String cursorStr =
rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
- if (null != cursorStr) {
- rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
- }
- }
- if (rb.isDebug()) {
- NamedList<Object> debug = new NamedList<>();
- debug.add("explain", new NamedList<>());
- rb.rsp.add("debug", debug);
- }
- rb.rsp.setPartialResults(rb.req);
+ shortCircuitedResults(req, rb);
}
} else {
// a distributed request
@@ -504,7 +541,10 @@ public class SearchHandler extends RequestHandlerBase
// call all components
for (SearchComponent c : components) {
- // the next stage is the minimum of what all components report
+ if (checkLimitsBefore(c, "distrib", rb.req, rb.rsp, components)) {
+ shortCircuitedResults(req, rb);
+ return;
+ } // the next stage is the minimum of what all components report
nextStage = Math.min(nextStage, c.distributedProcess(rb));
}
@@ -558,6 +598,8 @@ public class SearchHandler extends RequestHandlerBase
? shardHandler1.takeCompletedIncludingErrors()
: shardHandler1.takeCompletedOrError();
if (srsp == null) break; // no more requests to wait for
+ AtomicReference<Object> detailMesg =
+ new AtomicReference<>(); // or perhaps new Object[1] ?
boolean anyResponsesPartial =
srsp.getShardRequest().responses.stream()
@@ -568,9 +610,20 @@ public class SearchHandler extends RequestHandlerBase
return false;
}
Object recursive =
resp.findRecursive("responseHeader", "partialResults");
+ if (recursive != null) {
+ Object message =
+ "[Shard:"
+ + response.getShardAddress()
+ + "]"
+ + resp.findRecursive(
+ "responseHeader",
+
RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+ detailMesg.compareAndSet(null, message); // first
one, ingore rest
+ }
return recursive != null;
});
if (anyResponsesPartial) {
+ rb.rsp.addPartialResponseDetail(detailMesg.get());
rsp.setPartialResults(rb.req);
}
// Was there an exception?
@@ -602,56 +655,122 @@ public class SearchHandler extends RequestHandlerBase
// let the components see the responses to the request
for (SearchComponent c : components) {
+ if (checkLimitsBefore(
+ c,
+ "handleResponses next stage:" + stageInEnglish(nextStage),
+ rb.req,
+ rb.rsp,
+ components)) {
+ shortCircuitedResults(req, rb);
+ return;
+ }
c.handleResponses(rb, srsp.getShardRequest());
}
}
}
for (SearchComponent c : components) {
+ if (checkLimitsBefore(
+ c, "finishStage stage:" + stageInEnglish(nextStage), rb.req,
rb.rsp, components)) {
+ return;
+ }
c.finishStage(rb);
}
// we are done when the next stage is MAX_VALUE
} while (nextStage != Integer.MAX_VALUE);
}
+ }
- // SOLR-5550: still provide shards.info if requested even for a short
circuited distrib request
- if (!rb.isDistrib
- && req.getParams().getBool(ShardParams.SHARDS_INFO, false)
- && rb.shortCircuitedURL != null) {
- NamedList<Object> shardInfo = new SimpleOrderedMap<>();
- SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
- if (rsp.getException() != null) {
- Throwable cause = rsp.getException();
- if (cause instanceof SolrServerException) {
- cause = ((SolrServerException) cause).getRootCause();
- } else {
- if (cause.getCause() != null) {
- cause = cause.getCause();
- }
+ private static boolean prepareComponents(
+ SolrQueryRequest req, ResponseBuilder rb, RTimerTree timer,
List<SearchComponent> components)
+ throws IOException {
+ if (timer == null) {
+ // non-debugging prepare phase
+ for (SearchComponent component : components) {
+ if (checkLimitsBefore(component, "prepare", rb.req, rb.rsp,
components)) {
+ shortCircuitedResults(req, rb);
+ return false;
}
- nl.add("error", cause.toString());
- if (!core.getCoreContainer().hideStackTrace()) {
- StringWriter trace = new StringWriter();
- cause.printStackTrace(new PrintWriter(trace));
- nl.add("trace", trace.toString());
+ component.prepare(rb);
+ }
+ } else {
+ // debugging prepare phase
+ RTimerTree subt = timer.sub("prepare");
+ for (SearchComponent c : components) {
+ if (checkLimitsBefore(c, "prepare debug", rb.req, rb.rsp, components))
{
+ shortCircuitedResults(req, rb);
+ return false;
}
- } else if (rb.getResults() != null) {
- nl.add("numFound", rb.getResults().docList.matches());
- nl.add(
- "numFoundExact",
- rb.getResults().docList.hitCountRelation() ==
TotalHits.Relation.EQUAL_TO);
- nl.add("maxScore", rb.getResults().docList.maxScore());
+ rb.setTimer(subt.sub(c.getName()));
+ c.prepare(rb);
+ rb.getTimer().stop();
}
- nl.add("shardAddress", rb.shortCircuitedURL);
- nl.add("time", req.getRequestTimer().getTime()); // elapsed time of this
request so far
+ subt.stop();
+ }
+ return true;
+ }
- int pos = rb.shortCircuitedURL.indexOf("://");
- String shardInfoName =
- pos != -1 ? rb.shortCircuitedURL.substring(pos + 3) :
rb.shortCircuitedURL;
- shardInfo.add(shardInfoName, nl);
- rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
+ private static String stageInEnglish(int nextStage) {
+ // This should probably be a enum, but that change should be its own
ticket.
+ switch (nextStage) {
+ case STAGE_START:
+ return "START";
+ case STAGE_PARSE_QUERY:
+ return "PARSE_QUERY";
+ case STAGE_TOP_GROUPS:
+ return "TOP_GROUPS";
+ case STAGE_EXECUTE_QUERY:
+ return "EXECUTE_QUERY";
+ case STAGE_GET_FIELDS:
+ return "GET_FIELDS";
+ // nobody wants to think it was DONE and canceled after it completed...
+ case STAGE_DONE:
+ return "FINISHING";
+ default:
+ throw new SolrException(
+ SolrException.ErrorCode.SERVER_ERROR, "Unrecognized stage:" +
nextStage);
+ }
+ }
+
+ private static void shortCircuitedResults(SolrQueryRequest req,
ResponseBuilder rb) {
+
+ if (rb.rsp.getResponse() == null) {
+ rb.rsp.addResponse(new SolrDocumentList());
+
+ // If a cursorMark was passed, and we didn't progress, set
+ // the nextCursorMark to the same position
+ String cursorStr =
rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
+ if (null != cursorStr) {
+ rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
+ }
}
+ if (rb.isDebug()) {
+ NamedList<Object> debug = new NamedList<>();
+ debug.add("explain", new NamedList<>());
+ rb.rsp.add("debug", debug);
+ }
+ rb.rsp.setPartialResults(rb.req);
+ }
+
+ private static boolean checkLimitsBefore(
+ SearchComponent c,
+ String when,
+ SolrQueryRequest req,
+ SolrQueryResponse resp,
+ List<SearchComponent> components) {
+
+ return getQueryLimits(req, resp)
+ .maybeExitWithPartialResults(
+ () ->
+ "["
+ + when
+ + "] Limit(s) exceeded prior to "
+ + c.getName()
+ + " in "
+ + components.stream()
+ .map(SearchComponent::getName)
+ .collect(Collectors.toList()));
}
private static void throwSolrException(Throwable shardResponseException)
throws SolrException {
diff --git a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
index 61b51a6af20..444867b430b 100644
--- a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
+++ b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
@@ -253,10 +253,12 @@ public class SolrRequestInfo {
*/
public QueryLimits getLimits() {
// make sure the ThreadCpuTime is always initialized
- return req == null || rsp == null
- ? QueryLimits.NONE
- : (QueryLimits)
- req.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new
QueryLimits(req, rsp));
+ return req == null || rsp == null ? QueryLimits.NONE : getQueryLimits(req,
rsp);
+ }
+
+ public static QueryLimits getQueryLimits(SolrQueryRequest request,
SolrQueryResponse response) {
+ return (QueryLimits)
+ request.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new
QueryLimits(request, response));
}
public SolrDispatchFilter.Action getAction() {
diff --git a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
index b399ce57b79..5676a63fe10 100644
--- a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
+++ b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
@@ -207,7 +207,10 @@ public class SolrQueryResponse {
*/
public void addPartialResponseDetail(Object detail) {
NamedList<Object> header = getResponseHeader();
- if (header != null && detail != null) {
+ // never overwrite the original detail message. The first limit violation
is the important one.
+ if (header != null
+ && detail != null
+ && header.get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY) == null) {
header.add(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY, detail);
}
}
diff --git a/solr/core/src/java/org/apache/solr/search/QueryLimits.java
b/solr/core/src/java/org/apache/solr/search/QueryLimits.java
index 7aee5657678..e6e0db5eed9 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryLimits.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryLimits.java
@@ -16,12 +16,14 @@
*/
package org.apache.solr.search;
+import static
org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY;
import static org.apache.solr.search.CpuAllowedLimit.hasCpuLimit;
import static org.apache.solr.search.TimeAllowedLimit.hasTimeLimit;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
+import java.util.function.Supplier;
import org.apache.lucene.index.QueryTimeout;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
@@ -113,7 +115,8 @@ public class QueryLimits implements QueryTimeout {
* @throws QueryLimitsExceededException if {@link #allowPartialResults} is
false and limits have
* been reached.
*/
- public boolean maybeExitWithPartialResults(String label) throws
QueryLimitsExceededException {
+ public boolean maybeExitWithPartialResults(Supplier<String> label)
+ throws QueryLimitsExceededException {
if (isLimitsEnabled() && shouldExit()) {
if (allowPartialResults) {
if (rsp != null) {
@@ -124,17 +127,25 @@ public class QueryLimits implements QueryTimeout {
"No request active, but attempting to exit with partial
results?");
}
rsp.setPartialResults(requestInfo.getReq());
- rsp.addPartialResponseDetail(formatExceptionMessage(label));
+ if
(rsp.getResponseHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY) ==
null) {
+ // don't want to add duplicate keys. Although technically legal,
there's a strong risk
+ // that clients won't anticipate it and break.
+ rsp.addPartialResponseDetail(formatExceptionMessage(label.get()));
+ }
}
return true;
} else {
- throw new QueryLimitsExceededException(formatExceptionMessage(label));
+ throw new
QueryLimitsExceededException(formatExceptionMessage(label.get()));
}
} else {
return false;
}
}
+ public boolean maybeExitWithPartialResults(String label) throws
QueryLimitsExceededException {
+ return maybeExitWithPartialResults(() -> label);
+ }
+
/**
* Method to diagnose limit exceeded. Note that while this should always
list the exceeded limit,
* it may also nominate additional limits that have been exceeded since the
actual check that
diff --git
a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
index 1ceb9636b20..f2e4aaa3c8f 100644
--- a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
+++ b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
@@ -32,7 +32,7 @@ import org.junit.Test;
*/
public class ExitableDirectoryReaderTest extends SolrTestCaseJ4 {
- static int NUM_DOCS = 100;
+ static final int NUM_DOCS = 100;
static final String assertionString = "/response/numFound==" + NUM_DOCS;
static final String failureAssertionString =
"/responseHeader/partialResults==true]";
static final String longTimeout = "10000";
@@ -70,9 +70,26 @@ public class ExitableDirectoryReaderTest extends
SolrTestCaseJ4 {
// this time we should get a query cache hit and hopefully no exception?
this may change in the
// future if time checks are put into other places.
- assertJQ(req("q", q, "timeAllowed", "1", "sleep", sleep), assertionString);
+
+ // 2024-4-15: it did change..., and now this fails with 1 or 2 ms and
passes with 3ms... I see
+ // no way this won't be terribly brittle. Maybe TestInjection of some sort
to bring this back?
+
+ // assertJQ(req("q", q, "timeAllowed", "2", "sleep", sleep),
assertionString);
+
+ // The idea that the request won't time out due to caching is a flawed
test methodology,
+ // It relies on the test running quickly and not stalling. The above test
should possibly
+ // be doing something along the lines of this (but we lack api for it)
+ //
+ // SolrCores solrCores =
ExitableDirectoryReaderTest.h.getCoreContainer().solrCores;
+ // List<SolrCore> cores = solrCores.getCores();
+ // for (SolrCore core : cores) {
+ // if (<<< find the right core >>> ) {
+ // ((SolrCache)core.getSearcher().get().<<<check cache for a key
like name:a* >>>
+ // }
+ // }
// now do the same for the filter cache
+ // 2024-4-15: this still passes probably because *:* is so fast, but it
still worries me
assertJQ(req("q", "*:*", "fq", q, "timeAllowed", "1", "sleep", sleep),
failureAssertionString);
// make sure that the result succeeds this time, and that a bad filter
wasn't cached
diff --git a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
b/solr/core/src/test/org/apache/solr/search/ComponentStageLimitsTest.java
similarity index 55%
copy from solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
copy to solr/core/src/test/org/apache/solr/search/ComponentStageLimitsTest.java
index 274994099f1..c3c5d72548e 100644
--- a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
+++ b/solr/core/src/test/org/apache/solr/search/ComponentStageLimitsTest.java
@@ -16,11 +16,11 @@
*/
package org.apache.solr.search;
+import static
org.apache.solr.response.SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY;
+
import java.lang.invoke.MethodHandles;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
@@ -35,7 +35,7 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class TestCpuAllowedLimit extends SolrCloudTestCase {
+public class ComponentStageLimitsTest extends SolrCloudTestCase {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String COLLECTION = "test";
@@ -65,6 +65,9 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
@BeforeClass
public static void setupClass() throws Exception {
+ // Note: copied from TestCpuAllowedLimit.java, Segments not likely
important
+ // at the moment, but setup retained for now.
+
// Using NoMergePolicy and 100 commits we should get 100 segments (across
all shards).
// At this point of writing MAX_SEGMENTS_PER_SLICE in lucene is 5, so we
should be
// ensured that any multithreaded testing will create 20 executable tasks
for the
@@ -91,42 +94,11 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
}
@Test
- public void testCompareToWallClock() throws Exception {
+ public void testLimitPrepare() throws Exception {
Assume.assumeTrue("Thread CPU time monitoring is not available",
ThreadCpuTimer.isSupported());
- long limitMs = 100;
- CpuAllowedLimit cpuLimit = new CpuAllowedLimit(limitMs);
- int[] randoms = new int[100];
- long startNs = System.nanoTime();
- int wakeups = 0;
- while (!cpuLimit.shouldExit()) {
- Thread.sleep(1);
- // do some busywork
- for (int i = 0; i < randoms.length; i++) {
- randoms[i] = random().nextInt();
- }
- wakeups++;
- }
- long endNs = System.nanoTime();
- long wallTimeDeltaMs = TimeUnit.MILLISECONDS.convert(endNs - startNs,
TimeUnit.NANOSECONDS);
- log.info(
- "CPU limit: {} ms, elapsed wall-clock: {} ms, wakeups: {}",
- limitMs,
- wallTimeDeltaMs,
- wakeups);
- assertTrue(
- "Elapsed wall-clock time expected much larger than 100ms but was " +
wallTimeDeltaMs,
- limitMs < wallTimeDeltaMs);
- }
-
- @Test
- public void testDistribLimit() throws Exception {
- Assume.assumeTrue("Thread CPU time monitoring is not available",
ThreadCpuTimer.isSupported());
-
SolrClient solrClient = cluster.getSolrClient();
-
- // no limits set - should eventually complete
- log.info("--- No limits, full results ---");
long sleepMs = 1000;
+ log.info("--- 500ms limit, 1000ms prepare phase - partial results ---");
QueryResponse rsp =
solrClient.query(
COLLECTION,
@@ -138,18 +110,28 @@ public class TestCpuAllowedLimit extends
SolrCloudTestCase {
ExpensiveSearchComponent.SLEEP_MS_PARAM,
String.valueOf(sleepMs),
"stages",
- "prepare,process"));
- // System.err.println("rsp=" + rsp.jsonStr());
+ "prepare",
+ "timeAllowed",
+ "500"));
+ System.err.println("rsp=" + rsp.jsonStr());
assertEquals(rsp.getHeader().get("status"), 0);
- Number qtime = (Number) rsp.getHeader().get("QTime");
- assertTrue("QTime expected " + qtime + " >> " + sleepMs, qtime.longValue()
> sleepMs);
- assertNull("should not have partial results",
rsp.getHeader().get("partialResults"));
- TestInjection.measureCpu();
- // 25 ms per 5 segments ~175ms each shard
- TestInjection.cpuTimerDelayInjectedNS = new AtomicInteger(25_000_000);
- // timeAllowed set, should return partial results
- log.info("--- timeAllowed, partial results ---");
- rsp =
+ assertNotNull("should have partial results",
rsp.getHeader().get("partialResults"));
+ String details = (String)
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+ assertNotNull(details);
+ assertTrue(details.contains("[prepare]"));
+ assertTrue(
+ details.contains("exceeded prior to query in
[expensiveSearchComponent, query, facet,"));
+ }
+
+ @Test
+ public void testLimitProcess() throws Exception {
+ Assume.assumeTrue("Thread CPU time monitoring is not available",
ThreadCpuTimer.isSupported());
+ SolrClient solrClient = cluster.getSolrClient();
+ String msg = "--- 500ms limit, 1000ms process phase - partial results ---";
+ log.info(msg);
+ System.out.println(msg);
+ int sleepMs = 2000;
+ QueryResponse rsp =
solrClient.query(
COLLECTION,
params(
@@ -160,16 +142,28 @@ public class TestCpuAllowedLimit extends
SolrCloudTestCase {
ExpensiveSearchComponent.SLEEP_MS_PARAM,
String.valueOf(sleepMs),
"stages",
- "prepare,process",
- "multiThreaded",
- "false",
+ "process",
"timeAllowed",
- "500"));
- // System.err.println("rsp=" + rsp.jsonStr());
- assertEquals("should have partial results", true,
rsp.getHeader().get("partialResults"));
+ "1000"));
+ System.err.println("rsp=" + rsp.jsonStr());
+ assertEquals(rsp.getHeader().get("status"), 0);
+ String details = (String)
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+ System.out.println("details=" + details);
+ assertNotNull(details);
+ assertTrue(details.contains("[process]"));
+ assertTrue(
+ details.contains("exceeded prior to query in
[expensiveSearchComponent, query, facet,"));
+ assertNotNull("should have partial results",
rsp.getHeader().get("partialResults"));
+ }
- log.info("--- timeAllowed, partial results, multithreading ---");
- rsp =
+ @Test
+ public void testLimitFinish() throws Exception {
+ Assume.assumeTrue("Thread CPU time monitoring is not available",
ThreadCpuTimer.isSupported());
+ SolrClient solrClient = cluster.getSolrClient();
+ long sleepMs = 1000;
+ log.info("--- 500ms limit, 1000ms finish phase - partial results ---");
+ sleepMs = 1000;
+ QueryResponse rsp =
solrClient.query(
COLLECTION,
params(
@@ -180,95 +174,47 @@ public class TestCpuAllowedLimit extends
SolrCloudTestCase {
ExpensiveSearchComponent.SLEEP_MS_PARAM,
String.valueOf(sleepMs),
"stages",
- "prepare,process",
- "multiThreaded",
- "true",
+ "finish",
"timeAllowed",
"500"));
- // System.err.println("rsp=" + rsp.jsonStr());
- assertNotNull("should have partial results",
rsp.getHeader().get("partialResults"));
-
- // cpuAllowed set with large value, should return full results
- log.info("--- cpuAllowed, full results ---");
- rsp =
- solrClient.query(
- COLLECTION,
- params(
- "q",
- "id:*",
- "sort",
- "id desc",
- "stages",
- "prepare,process",
- "cpuAllowed",
- "10000"));
- // System.err.println("rsp=" + rsp.jsonStr());
- assertNull("should have full results",
rsp.getHeader().get("partialResults"));
- // cpuAllowed set, should return partial results
- log.info("--- cpuAllowed 1, partial results ---");
- rsp =
- solrClient.query(
- COLLECTION,
- params(
- "q",
- "id:*",
- "sort",
- "id desc",
- "stages",
- "prepare,process",
- "cpuAllowed",
- "100",
- "multiThreaded",
- "false"));
- // System.err.println("rsp=" + rsp.jsonStr());
+ System.err.println("rsp=" + rsp.jsonStr());
+ assertEquals(rsp.getHeader().get("status"), 0);
+ String details = (String)
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+ assertNotNull(details);
+ assertTrue(details.contains("[finishStage stage:PARSE_QUERY]"));
+ assertTrue(
+ details.contains("exceeded prior to query in
[expensiveSearchComponent, query, facet,"));
assertNotNull("should have partial results",
rsp.getHeader().get("partialResults"));
- log.info("--- cpuAllowed 1, partial results omitted ---");
- rsp =
- solrClient.query(
- COLLECTION,
- params(
- "q",
- "id:*",
- "sort",
- "id desc",
- "stages",
- "prepare,process",
- "cpuAllowed",
- "100",
- "partialResults",
- "false",
- "multiThreaded",
- "false",
- "_",
- "foo"));
- String s = rsp.jsonStr();
- System.err.println("rsp=" + s);
- assertEquals("should have partial results", "omitted",
rsp.getHeader().get("partialResults"));
+ }
- // cpuAllowed set, should return partial results
- log.info("--- cpuAllowed 2, partial results, multi-threaded ---");
- rsp =
+ @Test
+ public void testLimitDistrib() throws Exception {
+ Assume.assumeTrue("Thread CPU time monitoring is not available",
ThreadCpuTimer.isSupported());
+ SolrClient solrClient = cluster.getSolrClient();
+ long sleepMs = 1000;
+ log.info("--- 500ms limit, 1000ms distrib phase - partial results ---");
+ sleepMs = 1000;
+ QueryResponse rsp =
solrClient.query(
COLLECTION,
params(
"q",
"id:*",
"sort",
- "id desc",
+ "id asc",
+ ExpensiveSearchComponent.SLEEP_MS_PARAM,
+ String.valueOf(sleepMs),
"stages",
- "prepare,process",
- "cpuAllowed",
- "100",
- "multiThreaded",
- "true"));
- // System.err.println("rsp=" + rsp.jsonStr());
+ "distrib",
+ "timeAllowed",
+ "500"));
+ System.err.println("rsp=" + rsp.jsonStr());
+ assertEquals(rsp.getHeader().get("status"), 0);
+ String details = (String)
rsp.getHeader().get(RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY);
+ assertNotNull(details);
+ assertTrue(details.contains("[distrib]"));
+ assertTrue(
+ details.contains("exceeded prior to query in
[expensiveSearchComponent, query, facet,"));
assertNotNull("should have partial results",
rsp.getHeader().get("partialResults"));
}
-
- @Test
- public void testDistribLimit2() throws Exception {
- // This looks silly, but it actually guards against:
- // https://issues.apache.org/jira/browse/SOLR-17203
- testDistribLimit();
- }
}
diff --git a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
index 274994099f1..4611de10eca 100644
--- a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
+++ b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
@@ -169,6 +169,7 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
assertEquals("should have partial results", true,
rsp.getHeader().get("partialResults"));
log.info("--- timeAllowed, partial results, multithreading ---");
+
rsp =
solrClient.query(
COLLECTION,
@@ -201,11 +202,11 @@ public class TestCpuAllowedLimit extends
SolrCloudTestCase {
"stages",
"prepare,process",
"cpuAllowed",
- "10000"));
- // System.err.println("rsp=" + rsp.jsonStr());
+ "100000"));
+ System.err.println("rsp=" + rsp.jsonStr());
assertNull("should have full results",
rsp.getHeader().get("partialResults"));
// cpuAllowed set, should return partial results
- log.info("--- cpuAllowed 1, partial results ---");
+ log.info("--- cpuAllowed 100, partial results ---");
rsp =
solrClient.query(
COLLECTION,
@@ -222,7 +223,7 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
"false"));
// System.err.println("rsp=" + rsp.jsonStr());
assertNotNull("should have partial results",
rsp.getHeader().get("partialResults"));
- log.info("--- cpuAllowed 1, partial results omitted ---");
+ log.info("--- cpuAllowed 100, partial results omitted ---");
rsp =
solrClient.query(
COLLECTION,
@@ -243,10 +244,11 @@ public class TestCpuAllowedLimit extends
SolrCloudTestCase {
"foo"));
String s = rsp.jsonStr();
System.err.println("rsp=" + s);
- assertEquals("should have partial results", "omitted",
rsp.getHeader().get("partialResults"));
+ assertEquals(
+ "should not have partial results", "omitted",
rsp.getHeader().get("partialResults"));
// cpuAllowed set, should return partial results
- log.info("--- cpuAllowed 2, partial results, multi-threaded ---");
+ log.info("--- cpuAllowed 100, partial results, multi-threaded ---");
rsp =
solrClient.query(
COLLECTION,
diff --git
a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java
b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java
index 4305809f23d..1128c086460 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java
@@ -293,9 +293,12 @@ public abstract class LBSolrClient extends SolrClient {
suffix = ":" + zombieServers.keySet();
}
// Skipping check time exceeded for the first request
- if (numServersTried > 0 && isTimeExceeded(timeAllowedNano, timeOutTime))
{
+ // Ugly string based hack but no live servers message here is VERY
misleading :(
+ if ((previousEx != null && previousEx.getMessage().contains("Limits
exceeded!"))
+ || (numServersTried > 0 && isTimeExceeded(timeAllowedNano,
timeOutTime))) {
throw new SolrServerException(
- "Time allowed to handle this request exceeded" + suffix,
previousEx);
+ "The processing limits for to this request were exceeded, see
cause for details",
+ previousEx);
}
if (serverStr == null) {
throw new SolrServerException(