This is an automated email from the ASF dual-hosted git repository.
stillalex pushed a commit to branch branch_9_4
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9_4 by this push:
new 79bce121a33 SOLR-17060 CoreContainer#create may deadlock with
concurrent requests for metrics (#2101)
79bce121a33 is described below
commit 79bce121a33397cd2bffa6e81c24fe4ae6d2e8f3
Author: Alex D <[email protected]>
AuthorDate: Tue Dec 19 12:48:36 2023 -0800
SOLR-17060 CoreContainer#create may deadlock with concurrent requests for
metrics (#2101)
---
solr/CHANGES.txt | 2 +
.../src/java/org/apache/solr/core/SolrCore.java | 17 ++++--
.../test/org/apache/solr/core/SolrCoreTest.java | 60 ++++++++++++++++++++++
.../solr/handler/admin/MetricsHandlerTest.java | 4 ++
4 files changed, 78 insertions(+), 5 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1352897c332..638f498a7d3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -15,6 +15,8 @@ Bug Fixes
* SOLR-6853: Allow '/' characters in the text managed by Managed Resources
API. (Nikita Rusetskii via Eric Pugh)
+* SOLR-17060: CoreContainer#create may deadlock with concurrent requests for
metrics (Alex Deparvu, David Smiley)
+
Dependency Upgrades
---------------------
(No changes)
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java
b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index cb741cc97b6..e4709673c79 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -259,6 +259,7 @@ public class SolrCore implements SolrInfoBean, Closeable {
private Counter newSearcherCounter;
private Counter newSearcherMaxReachedCounter;
private Counter newSearcherOtherErrorsCounter;
+ private volatile boolean newSearcherReady = false;
private final String metricTag = SolrMetricProducer.getUniqueMetricTag(this,
null);
private final SolrMetricsContext solrMetricsContext;
@@ -1357,14 +1358,14 @@ public class SolrCore implements SolrInfoBean,
Closeable {
"sizeInBytes",
Category.INDEX.toString());
parentContext.gauge(
- () -> isClosed() ? parentContext.nullNumber() : getSegmentCount(),
+ () -> isClosed() ? parentContext.nullString() :
NumberUtils.readableSize(getIndexSize()),
true,
- "segments",
+ "size",
Category.INDEX.toString());
parentContext.gauge(
- () -> isClosed() ? parentContext.nullString() :
NumberUtils.readableSize(getIndexSize()),
+ () -> isReady() ? getSegmentCount() : parentContext.nullNumber(),
true,
- "size",
+ "segments",
Category.INDEX.toString());
final CloudDescriptor cd = getCoreDescriptor().getCloudDescriptor();
@@ -1923,6 +1924,11 @@ public class SolrCore implements SolrInfoBean, Closeable
{
return refCount.get() <= 0;
}
+ /** Returns true if the core is ready for use. It is not initializing or
closing/closed. */
+ public boolean isReady() {
+ return !isClosed() && newSearcherReady;
+ }
+
private Collection<CloseHook> closeHooks = null;
/** Add a close callback hook */
@@ -2131,6 +2137,7 @@ public class SolrCore implements SolrInfoBean, Closeable {
* because it might be closed soon after this method returns; it really
depends.
*/
public <R> R withSearcher(IOFunction<SolrIndexSearcher, R> lambda) throws
IOException {
+ assert isReady();
final RefCounted<SolrIndexSearcher> refCounted = getSearcher();
try {
return lambda.apply(refCounted.get());
@@ -2728,7 +2735,6 @@ public class SolrCore implements SolrInfoBean, Closeable {
if (!success) {
newSearcherOtherErrorsCounter.inc();
- ;
synchronized (searcherLock) {
onDeckSearchers--;
@@ -2758,6 +2764,7 @@ public class SolrCore implements SolrInfoBean, Closeable {
// we want to do this after we decrement onDeckSearchers so another
thread
// doesn't increment first and throw a false warning.
openSearcherLock.unlock();
+ newSearcherReady = true;
}
}
diff --git a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
index e401bcc695a..28cfbc70cae 100644
--- a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
+++ b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
@@ -16,6 +16,8 @@
*/
package org.apache.solr.core;
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.MetricFilter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -24,6 +26,7 @@ import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.ExecutorUtil;
@@ -32,6 +35,7 @@ import org.apache.solr.handler.ReplicationHandler;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.component.QueryComponent;
import org.apache.solr.handler.component.SpellCheckComponent;
+import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
@@ -327,6 +331,62 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
assertTrue(core.areAllSearcherReferencesEmpty());
}
+ /**
+ * Best effort attempt to recreate a deadlock between SolrCore
initialization and Index metrics
+ * poll.
+ *
+ * <p>See https://issues.apache.org/jira/browse/SOLR-17060
+ */
+ @Test
+ public void testCoreInitDeadlockMetrics() throws Exception {
+ SolrMetricManager metricManager = h.getCoreContainer().getMetricManager();
+ CoreContainer coreContainer = h.getCoreContainer();
+
+ String coreName = "tmpCore";
+ AtomicBoolean created = new AtomicBoolean(false);
+ AtomicBoolean atLeastOnePoll = new AtomicBoolean(false);
+
+ final ExecutorService executor =
+ ExecutorUtil.newMDCAwareFixedThreadPool(
+ 1, new SolrNamedThreadFactory("testCoreInitDeadlockMetrics"));
+ executor.submit(
+ () -> {
+ while (!created.get()) {
+ var metrics =
+ metricManager.getMetrics(
+ "solr.core." + coreName,
+
MetricFilter.startsWith(SolrInfoBean.Category.INDEX.toString()));
+ for (var m : metrics.values()) {
+ if (m instanceof Gauge) {
+ var v = ((Gauge<?>) m).getValue();
+ atLeastOnePoll.compareAndSet(false, v != null);
+ }
+ }
+
+ try {
+ TimeUnit.MILLISECONDS.sleep(5);
+ } catch (InterruptedException e1) {
+ throw new RuntimeException(e1);
+ }
+ }
+ });
+
+ TimeUnit.MILLISECONDS.sleep(25);
+ try (var tmpCore = coreContainer.create(coreName, Map.of("configSet",
"minimal"))) {
+ tmpCore.open();
+ for (int i = 0; i < 10; i++) {
+ TimeUnit.MILLISECONDS.sleep(50); // to allow metrics to be checked at
least once
+ if (atLeastOnePoll.get()) {
+ break;
+ }
+ }
+ } finally {
+ created.set(true);
+ ExecutorUtil.shutdownAndAwaitTermination(executor);
+ }
+ assertTrue(atLeastOnePoll.get());
+ }
+
private static class NewSearcherRunnable implements Runnable {
private final SolrCore core;
diff --git
a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
index 1f4697ecc10..c2ccdb43baf 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
@@ -93,6 +93,10 @@ public class MetricsHandlerTest extends SolrTestCaseJ4 {
// response wasn't serialized, so we get here whatever MetricUtils
produced instead of NamedList
assertNotNull(((MapWriter) o)._get("count", null));
assertEquals(0L, ((MapWriter) nl.get("SEARCHER.new.errors"))._get("count",
null));
+ assertNotNull(nl.get("INDEX.segments")); // int gauge
+ assertTrue((int) ((MapWriter) nl.get("INDEX.segments"))._get("value",
null) >= 0);
+ assertNotNull(nl.get("INDEX.sizeInBytes")); // long gauge
+ assertTrue((long) ((MapWriter) nl.get("INDEX.sizeInBytes"))._get("value",
null) >= 0);
nl = (NamedList<?>) values.get("solr.node");
assertNotNull(nl.get("CONTAINER.cores.loaded")); // int gauge
assertEquals(1, ((MapWriter)
nl.get("CONTAINER.cores.loaded"))._get("value", null));