This is an automated email from the ASF dual-hosted git repository.
stillalex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 7e492564e4a SOLR-17011 Add tracing spans to internal collection
commands (#1979)
7e492564e4a is described below
commit 7e492564e4a2344fc613e97f5f22efa60b125190
Author: Alex D <[email protected]>
AuthorDate: Tue Oct 24 14:02:30 2023 -0700
SOLR-17011 Add tracing spans to internal collection commands (#1979)
Added tracing spans to internal collection commands. Made replication trace
aware so any replication that is triggered as a result of a collection command
will correctly reflect the trace id
---
solr/CHANGES.txt | 2 +
.../solr/cloud/api/collections/CollApiCmds.java | 51 ++++++++-
.../java/org/apache/solr/handler/IndexFetcher.java | 9 +-
.../org/apache/solr/update/UpdateShardHandler.java | 9 --
.../org/apache/solr/util/tracing/TraceUtils.java | 9 ++
.../TestSimplePropagatorDistributedTracing.java | 17 +++
.../solr/opentelemetry/TestDistributedTracing.java | 123 ++++++++++++++++++---
7 files changed, 193 insertions(+), 27 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 83389922a52..a95d1e4fced 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -20,6 +20,8 @@ Improvements
* SOLR-16536: Replace OpenTracing instrumentation with OpenTelemetry (Alex
Deparvu, janhoy)
+* SOLR-17011: Add tracing spans to internal collection commands (Alex Deparvu)
+
Optimizations
---------------------
(No changes)
diff --git
a/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java
b/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java
index 1595904bc4f..5d3d18795ee 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CollApiCmds.java
@@ -69,10 +69,13 @@ import static
org.apache.solr.common.params.CollectionParams.CollectionAction.SP
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonParams.NAME;
+import io.opentelemetry.api.trace.Span;
+import io.opentelemetry.context.Context;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
+import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.solr.cloud.DistributedClusterStateUpdater;
@@ -94,6 +97,7 @@ import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.util.tracing.TraceUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -183,7 +187,52 @@ public class CollApiCmds {
}
CollApiCmds.CollectionApiCommand
getActionCommand(CollectionParams.CollectionAction action) {
- return commandMap.get(action);
+ var command = commandMap.get(action);
+ if (command != null) {
+ return new TraceAwareCommand(commandMap.get(action));
+ } else {
+ return command;
+ }
+ }
+ }
+
+ public static class TraceAwareCommand implements CollectionApiCommand {
+
+ private final CollectionApiCommand command;
+ private final Context ctx =
+ TraceUtils.extractContext(null); // allows trace id to be generated if
missing
+
+ public TraceAwareCommand(CollectionApiCommand command) {
+ this.command = command;
+ }
+
+ @Override
+ public void call(ClusterState state, ZkNodeProps message,
NamedList<Object> results)
+ throws Exception {
+ final Span localSpan;
+ final Context localContext;
+ if (Span.current().isRecording()) {
+ localSpan = null;
+ localContext = ctx;
+ } else {
+ String collection =
+ Optional.ofNullable(message.getStr(COLLECTION_PROP,
message.getStr(NAME)))
+ .orElse("unknown");
+ boolean isAsync = message.containsKey(ASYNC);
+ localSpan =
+ TraceUtils.startCollectionApiCommandSpan(
+ command.getClass().getSimpleName(), collection, isAsync);
+ localContext = ctx.with(localSpan);
+ }
+
+ try (var scope = localContext.makeCurrent()) {
+ assert scope != null; // prevent javac warning about scope being unused
+ command.call(state, message, results);
+ } finally {
+ if (localSpan != null) {
+ localSpan.end();
+ }
+ }
}
}
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index 3015afb6900..1df9d1e101c 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -32,7 +32,6 @@ import static
org.apache.solr.handler.ReplicationHandler.EXTERNAL;
import static org.apache.solr.handler.ReplicationHandler.FETCH_FROM_LEADER;
import static org.apache.solr.handler.ReplicationHandler.FILE;
import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
-import static org.apache.solr.handler.ReplicationHandler.FileInfo;
import static org.apache.solr.handler.ReplicationHandler.GENERATION;
import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
import static org.apache.solr.handler.ReplicationHandler.LEADER_URL;
@@ -121,6 +120,7 @@ import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.ReplicationHandler.FileInfo;
import org.apache.solr.handler.admin.api.CoreReplicationAPI;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
@@ -132,6 +132,7 @@ import org.apache.solr.util.PropertiesOutputStream;
import org.apache.solr.util.RTimer;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestInjection;
+import org.apache.solr.util.stats.InstrumentedHttpRequestExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -265,11 +266,13 @@ public class IndexFetcher {
httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_USER,
httpBasicAuthUser);
httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_PASS,
httpBasicAuthPassword);
httpClientParams.set(HttpClientUtil.PROP_ALLOW_COMPRESSION,
useCompression);
-
+ // no metrics, just tracing
+ InstrumentedHttpRequestExecutor executor = new
InstrumentedHttpRequestExecutor(null);
return HttpClientUtil.createClient(
httpClientParams,
core.getCoreContainer().getUpdateShardHandler().getRecoveryOnlyConnectionManager(),
- true);
+ true,
+ executor);
}
public IndexFetcher(
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java
b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java
index 3a52fe87fb9..b3ab8cb9156 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java
@@ -21,7 +21,6 @@ import static
org.apache.solr.util.stats.InstrumentedHttpRequestExecutor.KNOWN_M
import com.google.common.annotations.VisibleForTesting;
import java.lang.invoke.MethodHandles;
import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
@@ -80,8 +79,6 @@ public class UpdateShardHandler implements SolrInfoBean {
private final CloseableHttpClient defaultClient;
- private final InstrumentedPoolingHttpClientConnectionManager
updateOnlyConnectionManager;
-
private final InstrumentedPoolingHttpClientConnectionManager
recoveryOnlyConnectionManager;
private final InstrumentedPoolingHttpClientConnectionManager
defaultConnectionManager;
@@ -90,16 +87,12 @@ public class UpdateShardHandler implements SolrInfoBean {
private final InstrumentedHttpListenerFactory updateHttpListenerFactory;
- private final Set<String> metricNames = ConcurrentHashMap.newKeySet();
private SolrMetricsContext solrMetricsContext;
private int socketTimeout = HttpClientUtil.DEFAULT_SO_TIMEOUT;
private int connectionTimeout = HttpClientUtil.DEFAULT_CONNECT_TIMEOUT;
public UpdateShardHandler(UpdateShardHandlerConfig cfg) {
- updateOnlyConnectionManager =
- new InstrumentedPoolingHttpClientConnectionManager(
-
HttpClientUtil.getSocketFactoryRegistryProvider().getSocketFactoryRegistry());
recoveryOnlyConnectionManager =
new InstrumentedPoolingHttpClientConnectionManager(
HttpClientUtil.getSocketFactoryRegistryProvider().getSocketFactoryRegistry());
@@ -108,8 +101,6 @@ public class UpdateShardHandler implements SolrInfoBean {
HttpClientUtil.getSocketFactoryRegistryProvider().getSocketFactoryRegistry());
ModifiableSolrParams clientParams = new ModifiableSolrParams();
if (cfg != null) {
- updateOnlyConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
-
updateOnlyConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
recoveryOnlyConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
recoveryOnlyConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
defaultConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
diff --git a/solr/core/src/java/org/apache/solr/util/tracing/TraceUtils.java
b/solr/core/src/java/org/apache/solr/util/tracing/TraceUtils.java
index a153ae82bb6..63f8cd0d04c 100644
--- a/solr/core/src/java/org/apache/solr/util/tracing/TraceUtils.java
+++ b/solr/core/src/java/org/apache/solr/util/tracing/TraceUtils.java
@@ -185,4 +185,13 @@ public class TraceUtils {
req.getSpan().setAttribute(TAG_CLASS, clazz);
}
}
+
+ public static Span startCollectionApiCommandSpan(
+ String name, String collection, boolean isAsync) {
+ Tracer tracer = getGlobalTracer();
+ SpanKind kind = isAsync ? SpanKind.PRODUCER : SpanKind.CLIENT;
+ SpanBuilder spanBuilder =
+ tracer.spanBuilder(name).setSpanKind(kind).setAttribute(TAG_DB,
collection);
+ return spanBuilder.startSpan();
+ }
}
diff --git
a/solr/core/src/test/org/apache/solr/util/tracing/TestSimplePropagatorDistributedTracing.java
b/solr/core/src/test/org/apache/solr/util/tracing/TestSimplePropagatorDistributedTracing.java
index 86693edf12d..36a4788d93e 100644
---
a/solr/core/src/test/org/apache/solr/util/tracing/TestSimplePropagatorDistributedTracing.java
+++
b/solr/core/src/test/org/apache/solr/util/tracing/TestSimplePropagatorDistributedTracing.java
@@ -31,9 +31,11 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.admin.CoreAdminOperation;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.update.processor.LogUpdateProcessorFactory;
import org.apache.solr.util.LogListener;
@@ -181,4 +183,19 @@ public class TestSimplePropagatorDistributedTracing
extends SolrCloudTestCase {
client.connect();
return client;
}
+
+ @Test
+ public void testInternalCollectionApiCommands() throws Exception {
+ String collecton = "testInternalCollectionApiCommands";
+ verifyCollectionCreation(collecton);
+ }
+
+ private void verifyCollectionCreation(String collection) throws Exception {
+ try (LogListener reqLog =
LogListener.info(CoreAdminOperation.class.getName())) {
+ var a1 = CollectionAdminRequest.createCollection(collection, 2, 2);
+ CollectionAdminResponse r1 = a1.process(cluster.getSolrClient());
+ assertEquals(0, r1.getStatus());
+ assertSameTraceId(reqLog, null);
+ }
+ }
}
diff --git
a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
index ab362068e2e..d16e60bcf17 100644
---
a/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
+++
b/solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java
@@ -24,7 +24,9 @@ import io.opentelemetry.sdk.trace.data.SpanData;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
@@ -38,6 +40,7 @@ import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.util.tracing.TraceUtils;
import org.junit.AfterClass;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -71,12 +74,15 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
CustomTestOtelTracerConfigurator.resetForTest();
}
+ @Before
+ private void resetSpanData() {
+ getAndClearSpans();
+ }
+
@Test
public void test() throws IOException, SolrServerException {
// TODO it would be clearer if we could compare the complete Span tree
between reality
// and what we assert it looks like in a structured visual way.
-
- getAndClearSpans(); // reset
CloudSolrClient cloudClient = cluster.getSolrClient();
// Indexing
@@ -90,7 +96,7 @@ public class TestDistributedTracing extends SolrCloudTestCase
{
assertOneSpanIsChildOfAnother(finishedSpans);
// core because cloudClient routes to core
assertEquals("post:/{core}/update", finishedSpans.get(0).getName());
- assertCoreName(finishedSpans.get(0));
+ assertCoreName(finishedSpans.get(0), COLLECTION);
cloudClient.add(COLLECTION, sdoc("id", "2"));
cloudClient.add(COLLECTION, sdoc("id", "3"));
@@ -116,12 +122,11 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
assertEquals(span.getTraceId(), parentTraceId);
}
assertEquals("get:/{core}/select", finishedSpans.get(0).getName());
- assertCoreName(finishedSpans.get(0));
+ assertCoreName(finishedSpans.get(0), COLLECTION);
}
@Test
public void testAdminApi() throws Exception {
- getAndClearSpans(); // reset
CloudSolrClient cloudClient = cluster.getSolrClient();
cloudClient.request(new GenericSolrRequest(SolrRequest.METHOD.GET,
"/admin/metrics"));
@@ -135,7 +140,6 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
@Test
public void testV2Api() throws Exception {
- getAndClearSpans(); // reset
CloudSolrClient cloudClient = cluster.getSolrClient();
new V2Request.Builder("/collections/" + COLLECTION + "/reload")
@@ -145,7 +149,7 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
.process(cloudClient);
var finishedSpans = getAndClearSpans();
assertEquals("post:/collections/{collection}/reload",
finishedSpans.get(0).getName());
- assertCollectionName(finishedSpans.get(0));
+ assertCollectionName(finishedSpans.get(0), COLLECTION);
new V2Request.Builder("/c/" + COLLECTION + "/update/json")
.withMethod(SolrRequest.METHOD.POST)
@@ -155,7 +159,7 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
.process(cloudClient);
finishedSpans = getAndClearSpans();
assertEquals("post:/c/{collection}/update/json",
finishedSpans.get(0).getName());
- assertCollectionName(finishedSpans.get(0));
+ assertCollectionName(finishedSpans.get(0), COLLECTION);
final V2Response v2Response =
new V2Request.Builder("/c/" + COLLECTION + "/select")
@@ -165,7 +169,7 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
.process(cloudClient);
finishedSpans = getAndClearSpans();
assertEquals("get:/c/{collection}/select", finishedSpans.get(0).getName());
- assertCollectionName(finishedSpans.get(0));
+ assertCollectionName(finishedSpans.get(0), COLLECTION);
assertEquals(1, ((SolrDocumentList)
v2Response.getResponse().get("response")).getNumFound());
}
@@ -176,7 +180,6 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
*/
@Test
public void testApacheClient() throws Exception {
- getAndClearSpans(); // reset
CollectionAdminRequest.ColStatus a1 =
CollectionAdminRequest.collectionStatus(COLLECTION);
CollectionAdminResponse r1 = a1.process(cluster.getSolrClient());
assertEquals(0, r1.getStatus());
@@ -191,16 +194,108 @@ public class TestDistributedTracing extends
SolrCloudTestCase {
}
}
+ @Test
+ public void testInternalCollectionApiCommands() throws Exception {
+ String collecton = "testInternalCollectionApiCommands";
+ verifyCollectionCreation(collecton);
+ verifyCollectionDeletion(collecton);
+ }
+
+ private void verifyCollectionCreation(String collection) throws Exception {
+ var a1 = CollectionAdminRequest.createCollection(collection, 2, 2);
+ CollectionAdminResponse r1 = a1.process(cluster.getSolrClient());
+ assertEquals(0, r1.getStatus());
+
+ // Expecting 8 spans:
+ // 1. api call "name=create:/admin/collections".
db.instance=testInternalCollectionApiCommands
+ // - unique traceId unrelated to the internal trace id generated for the
operation
+ // 2. internal CollectionApiCommand "name=CreateCollectionCmd"
+ // db.instance=testInternalCollectionApiCommands
+ // - this will be the parent span, all following spans will have the same
traceId
+ //
+ // 3..6 (4 times) name=post:/admin/cores
+ // db.instance=testInternalCollectionApiCommands_shard1_replica_n2
+ // db.instance=testInternalCollectionApiCommands_shard2_replica_n4
+ // db.instance=testInternalCollectionApiCommands_shard2_replica_n1
+ // db.instance=testInternalCollectionApiCommands_shard1_replica_n6
+ //
+ // 7..8 (2 times) name=post:/{core}/get
+ // db.instance=testInternalCollectionApiCommands_shard2_replica_n4
+ // db.instance=testInternalCollectionApiCommands_shard1_replica_n2
+
+ var finishedSpans = getAndClearSpans();
+ var s0 = finishedSpans.remove(0);
+ assertCollectionName(s0, collection);
+ assertEquals("create:/admin/collections", s0.getName());
+
+ Map<String, Integer> ops = new HashMap<>();
+ assertEquals(7, finishedSpans.size());
+ var parentTraceId = getRootTraceId(finishedSpans);
+ for (var span : finishedSpans) {
+ if (isRootSpan(span)) {
+ assertCollectionName(span, collection);
+ } else {
+ assertEquals(span.getParentSpanContext().getTraceId(), parentTraceId);
+ assertCoreName(span, collection);
+ }
+ assertEquals(span.getTraceId(), parentTraceId);
+ ops.put(span.getName(), ops.getOrDefault(span.getName(), 0) + 1);
+ }
+ var expectedOps =
+ Map.of("CreateCollectionCmd", 1, "post:/admin/cores", 4,
"post:/{core}/get", 2);
+ assertEquals(expectedOps, ops);
+ }
+
+ private void verifyCollectionDeletion(String collection) throws Exception {
+ var a1 = CollectionAdminRequest.deleteCollection(collection);
+ CollectionAdminResponse r1 = a1.process(cluster.getSolrClient());
+ assertEquals(0, r1.getStatus());
+
+ // Expecting 6 spans:
+ // 1. api call "name=delete:/admin/collections".
db.instance=testInternalCollectionApiCommands
+ // - unique traceId unrelated to the internal trace id generated for the
operation
+ // 2. internal CollectionApiCommand "name=DeleteCollectionCmd"
+ // db.instance=testInternalCollectionApiCommands
+ // - this will be the parent span, all following spans will have the same
traceId
+ //
+ // 3..6 (4 times) name=post:/admin/cores
+ // db.instance=testInternalCollectionApiCommands_shard2_replica_n1
+ // db.instance=testInternalCollectionApiCommands_shard1_replica_n2
+ // db.instance=testInternalCollectionApiCommands_shard2_replica_n4
+ // db.instance=testInternalCollectionApiCommands_shard1_replica_n6
+
+ var finishedSpans = getAndClearSpans();
+ var s0 = finishedSpans.remove(0);
+ assertCollectionName(s0, collection);
+ assertEquals("delete:/admin/collections", s0.getName());
+
+ Map<String, Integer> ops = new HashMap<>();
+ assertEquals(5, finishedSpans.size());
+ var parentTraceId = getRootTraceId(finishedSpans);
+ for (var span : finishedSpans) {
+ if (isRootSpan(span)) {
+ assertCollectionName(span, collection);
+ } else {
+ assertEquals(span.getParentSpanContext().getTraceId(), parentTraceId);
+ assertCoreName(span, collection);
+ }
+ assertEquals(span.getTraceId(), parentTraceId);
+ ops.put(span.getName(), ops.getOrDefault(span.getName(), 0) + 1);
+ }
+ var expectedOps = Map.of("DeleteCollectionCmd", 1, "post:/admin/cores", 4);
+ assertEquals(expectedOps, ops);
+ }
+
private static boolean isRootSpan(SpanData span) {
return !span.getParentSpanContext().isValid();
}
- private static void assertCollectionName(SpanData span) {
- assertEquals(COLLECTION, span.getAttributes().get(TraceUtils.TAG_DB));
+ private static void assertCollectionName(SpanData span, String collection) {
+ assertEquals(collection, span.getAttributes().get(TraceUtils.TAG_DB));
}
- private static void assertCoreName(SpanData span) {
-
assertTrue(span.getAttributes().get(TraceUtils.TAG_DB).startsWith(COLLECTION +
"_"));
+ private static void assertCoreName(SpanData span, String collection) {
+
assertTrue(span.getAttributes().get(TraceUtils.TAG_DB).startsWith(collection +
"_"));
}
private void assertOneSpanIsChildOfAnother(List<SpanData> finishedSpans) {