mlbiscoc commented on code in PR #3519: URL: https://github.com/apache/solr/pull/3519#discussion_r2305174004
########## solr/core/src/java/org/apache/solr/util/stats/OtelInstrumentedExecutorService.java: ########## @@ -0,0 +1,276 @@ +package org.apache.solr.util.stats; + +import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR; +import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.solr.core.SolrInfoBean; +import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.instruments.AttributedLongCounter; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer.MetricTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongUpDownCounter; + +/** + * OTEL instrumentation wrapper around {@link ExecutorService}. Based on {@link + * com.codahale.metrics.InstrumentedExecutorService}. + */ +public class OtelInstrumentedExecutorService implements ExecutorService { + public static final AttributeKey<String> NAME_ATTR = AttributeKey.stringKey("executor_name"); Review Comment: Maybe call it `EXECUTOR_NAME` since just `NAME` is too generic, I can imagine an actual `NAME` attribute existing one day. ########## solr/core/src/java/org/apache/solr/util/stats/OtelInstrumentedExecutorService.java: ########## @@ -0,0 +1,283 @@ +package org.apache.solr.util.stats; + +import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR; +import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.solr.core.SolrInfoBean; +import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.OtelUnit; +import org.apache.solr.metrics.otel.instruments.AttributedLongCounter; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer.MetricTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongUpDownCounter; + +/** + * OTEL instrumentation wrapper around {@link ExecutorService}. Based on {@link + * com.codahale.metrics.InstrumentedExecutorService}. + */ +public class OtelInstrumentedExecutorService implements ExecutorService { + public static final AttributeKey<String> NAME_ATTR = AttributeKey.stringKey("executor_name"); + + private final ExecutorService delegate; + private final AttributedLongCounter submitted; + private final AttributedLongUpDownCounter running; + private final AttributedLongCounter completed; + private final AttributedLongTimer idle; + private final AttributedLongTimer duration; + + public OtelInstrumentedExecutorService( + ExecutorService delegate, + SolrMetricsContext ctx, + SolrInfoBean.Category category, + String executorName) { + this.delegate = delegate; + + Attributes attrs = + Attributes.builder() + .put(CATEGORY_ATTR, category.toString()) + .put(NAME_ATTR, executorName) + .build(); + + // Each metric type needs a separate name to avoid obscuring other types + this.submitted = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "submitted").build()); + this.completed = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "completed").build()); Review Comment: Initialize the `ctx.longCounter` once then just pass it in ########## solr/core/src/java/org/apache/solr/core/CoreContainer.java: ########## @@ -853,14 +853,11 @@ private void loadInternal() { // NOCOMMIT: Do we need this for OTEL or is this specific for reporters? Review Comment: We can start removing these NOCOMMIT comments ########## solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java: ########## @@ -445,10 +445,7 @@ public void initializeMetrics( // TODO SOLR-17458: Add Otel Review Comment: Clean up this TODO ########## solr/core/src/java/org/apache/solr/util/stats/OtelInstrumentedExecutorService.java: ########## @@ -0,0 +1,283 @@ +package org.apache.solr.util.stats; + +import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR; +import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.solr.core.SolrInfoBean; +import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.OtelUnit; +import org.apache.solr.metrics.otel.instruments.AttributedLongCounter; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer.MetricTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongUpDownCounter; + +/** + * OTEL instrumentation wrapper around {@link ExecutorService}. Based on {@link + * com.codahale.metrics.InstrumentedExecutorService}. + */ +public class OtelInstrumentedExecutorService implements ExecutorService { + public static final AttributeKey<String> NAME_ATTR = AttributeKey.stringKey("executor_name"); + + private final ExecutorService delegate; + private final AttributedLongCounter submitted; + private final AttributedLongUpDownCounter running; + private final AttributedLongCounter completed; + private final AttributedLongTimer idle; + private final AttributedLongTimer duration; + + public OtelInstrumentedExecutorService( + ExecutorService delegate, + SolrMetricsContext ctx, + SolrInfoBean.Category category, + String executorName) { + this.delegate = delegate; + + Attributes attrs = + Attributes.builder() + .put(CATEGORY_ATTR, category.toString()) + .put(NAME_ATTR, executorName) + .build(); + + // Each metric type needs a separate name to avoid obscuring other types + this.submitted = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "submitted").build()); + this.completed = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "completed").build()); + this.running = + new AttributedLongUpDownCounter( + ctx.longUpDownCounter( + "solr_executor_tasks_active", "Number of running ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "running").build()); Review Comment: I'm not sure if this running should be an up/down counter or a gauge but I think its ok. I only every used them for cumulative sums which can also be negative but I could be wrong here. Regardless just put running in the name. No point in having the type attribute if its the only metric with the `type=running as there is nothing to aggregate on it for this metric. ########## solr/core/src/java/org/apache/solr/util/stats/OtelInstrumentedExecutorService.java: ########## @@ -0,0 +1,283 @@ +package org.apache.solr.util.stats; + +import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR; +import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.solr.core.SolrInfoBean; +import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.OtelUnit; +import org.apache.solr.metrics.otel.instruments.AttributedLongCounter; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer.MetricTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongUpDownCounter; + +/** + * OTEL instrumentation wrapper around {@link ExecutorService}. Based on {@link + * com.codahale.metrics.InstrumentedExecutorService}. + */ +public class OtelInstrumentedExecutorService implements ExecutorService { + public static final AttributeKey<String> NAME_ATTR = AttributeKey.stringKey("executor_name"); + + private final ExecutorService delegate; + private final AttributedLongCounter submitted; + private final AttributedLongUpDownCounter running; + private final AttributedLongCounter completed; + private final AttributedLongTimer idle; + private final AttributedLongTimer duration; + + public OtelInstrumentedExecutorService( + ExecutorService delegate, + SolrMetricsContext ctx, + SolrInfoBean.Category category, + String executorName) { + this.delegate = delegate; + + Attributes attrs = + Attributes.builder() + .put(CATEGORY_ATTR, category.toString()) + .put(NAME_ATTR, executorName) + .build(); + + // Each metric type needs a separate name to avoid obscuring other types + this.submitted = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "submitted").build()); + this.completed = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "completed").build()); + this.running = + new AttributedLongUpDownCounter( + ctx.longUpDownCounter( + "solr_executor_tasks_active", "Number of running ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "running").build()); + this.idle = + new AttributedLongTimer( + ctx.longHistogram( + "solr_executor_task_times", + "Timing of ExecutorService tasks", + OtelUnit.MILLISECONDS), + attrs.toBuilder().put(TYPE_ATTR, "idle").build()); + this.duration = + new AttributedLongTimer( + ctx.longHistogram( + "solr_executor_task_times", + "Timing of ExecutorService tasks", + OtelUnit.MILLISECONDS), + attrs.toBuilder().put(TYPE_ATTR, "duration").build()); Review Comment: Same as the comment on initializing once. ########## solr/core/src/java/org/apache/solr/util/stats/OtelInstrumentedExecutorService.java: ########## @@ -0,0 +1,283 @@ +package org.apache.solr.util.stats; + +import static org.apache.solr.metrics.SolrMetricProducer.CATEGORY_ATTR; +import static org.apache.solr.metrics.SolrMetricProducer.TYPE_ATTR; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.solr.core.SolrInfoBean; +import org.apache.solr.metrics.SolrMetricsContext; +import org.apache.solr.metrics.otel.OtelUnit; +import org.apache.solr.metrics.otel.instruments.AttributedLongCounter; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongTimer.MetricTimer; +import org.apache.solr.metrics.otel.instruments.AttributedLongUpDownCounter; + +/** + * OTEL instrumentation wrapper around {@link ExecutorService}. Based on {@link + * com.codahale.metrics.InstrumentedExecutorService}. + */ +public class OtelInstrumentedExecutorService implements ExecutorService { + public static final AttributeKey<String> NAME_ATTR = AttributeKey.stringKey("executor_name"); + + private final ExecutorService delegate; + private final AttributedLongCounter submitted; + private final AttributedLongUpDownCounter running; + private final AttributedLongCounter completed; + private final AttributedLongTimer idle; + private final AttributedLongTimer duration; + + public OtelInstrumentedExecutorService( + ExecutorService delegate, + SolrMetricsContext ctx, + SolrInfoBean.Category category, + String executorName) { + this.delegate = delegate; + + Attributes attrs = + Attributes.builder() + .put(CATEGORY_ATTR, category.toString()) + .put(NAME_ATTR, executorName) + .build(); + + // Each metric type needs a separate name to avoid obscuring other types + this.submitted = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "submitted").build()); + this.completed = + new AttributedLongCounter( + ctx.longCounter("solr_executor_tasks", "Number of ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "completed").build()); + this.running = + new AttributedLongUpDownCounter( + ctx.longUpDownCounter( + "solr_executor_tasks_active", "Number of running ExecutorService tasks"), + attrs.toBuilder().put(TYPE_ATTR, "running").build()); + this.idle = + new AttributedLongTimer( + ctx.longHistogram( + "solr_executor_task_times", + "Timing of ExecutorService tasks", + OtelUnit.MILLISECONDS), + attrs.toBuilder().put(TYPE_ATTR, "idle").build()); + this.duration = + new AttributedLongTimer( + ctx.longHistogram( + "solr_executor_task_times", + "Timing of ExecutorService tasks", + OtelUnit.MILLISECONDS), + attrs.toBuilder().put(TYPE_ATTR, "duration").build()); + + if (delegate instanceof ThreadPoolExecutor) { + ThreadPoolExecutor threadPool = (ThreadPoolExecutor) delegate; + ctx.observableLongGauge( + "solr_executor_thread_pool_size", + "Thread pool size", + measurement -> { + measurement.record( + threadPool.getPoolSize(), attrs.toBuilder().put(TYPE_ATTR, "size").build()); + measurement.record( + threadPool.getCorePoolSize(), attrs.toBuilder().put(TYPE_ATTR, "core").build()); + measurement.record( + threadPool.getMaximumPoolSize(), attrs.toBuilder().put(TYPE_ATTR, "max").build()); + }); + + final BlockingQueue<Runnable> taskQueue = threadPool.getQueue(); + ctx.observableLongGauge( + "solr_executor_thread_pool_tasks", + "Thread pool task counts", + measurement -> { + measurement.record( + threadPool.getActiveCount(), attrs.toBuilder().put(TYPE_ATTR, "active").build()); + measurement.record( + threadPool.getCompletedTaskCount(), + attrs.toBuilder().put(TYPE_ATTR, "completed").build()); + measurement.record( + taskQueue.size(), attrs.toBuilder().put(TYPE_ATTR, "queued").build()); + measurement.record( + taskQueue.remainingCapacity(), + attrs.toBuilder().put(TYPE_ATTR, "capacity").build()); + }); + } else if (delegate instanceof ForkJoinPool) { + ForkJoinPool forkJoinPool = (ForkJoinPool) delegate; + ctx.observableLongGauge( + "solr_executor_fork_join_pool_tasks", + "Fork join pool task counts", + measurement -> { + measurement.record( + forkJoinPool.getStealCount(), attrs.toBuilder().put(TYPE_ATTR, "stolen").build()); + measurement.record( + forkJoinPool.getQueuedTaskCount(), + attrs.toBuilder().put(TYPE_ATTR, "queued").build()); + }); + ctx.observableLongGauge( + "solr_executor_fork_join_pool_threads", + "Fork join pool thread counts", + measurement -> { + measurement.record( + forkJoinPool.getActiveThreadCount(), + attrs.toBuilder().put(TYPE_ATTR, "active").build()); + measurement.record( + forkJoinPool.getRunningThreadCount(), + attrs.toBuilder().put(TYPE_ATTR, "running").build()); + }); Review Comment: I just pushed a [commit](https://github.com/apache/solr/commit/5cc10d5b7241b1c1195a3088a8709c556e25f0b8) to start closing these observables. We should close these on `shutdown()` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org