This is an automated email from the ASF dual-hosted git repository.
ishan pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_10x by this push:
new 44eff083d84 SOLR-17937: Adding GPU details to the Admin UI (#3717)
44eff083d84 is described below
commit 44eff083d84285a89fce44ad839c398e3955bb42
Author: Puneet Ahuja <[email protected]>
AuthorDate: Fri Dec 12 20:12:47 2025 +0530
SOLR-17937: Adding GPU details to the Admin UI (#3717)
---
changelog/unreleased/GPU-info-admin-ui.yml | 6 +
gradle/libs.versions.toml | 2 +
.../java/org/apache/solr/core/CoreContainer.java | 60 ++++
.../solr/handler/admin/SystemInfoHandler.java | 47 +++
.../apache/solr/metrics/GpuMetricsProvider.java} | 24 +-
solr/modules/cuvs/build.gradle | 6 +-
.../org/apache/solr/cuvs/GpuMetricsService.java | 374 +++++++++++++++++++++
solr/webapp/web/css/angular/index.css | 7 +
solr/webapp/web/js/angular/controllers/index.js | 48 +++
solr/webapp/web/partials/index.html | 51 ++-
10 files changed, 607 insertions(+), 18 deletions(-)
diff --git a/changelog/unreleased/GPU-info-admin-ui.yml
b/changelog/unreleased/GPU-info-admin-ui.yml
new file mode 100644
index 00000000000..2546e3e8d6d
--- /dev/null
+++ b/changelog/unreleased/GPU-info-admin-ui.yml
@@ -0,0 +1,6 @@
+# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
+title: Adding GPU details on the Admin UI using OpenTelemetry
+type: added
+authors:
+ - name: Puneet Ahuja
+
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 3a790309e31..d247eaa8014 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -74,6 +74,7 @@ commons-codec = "1.19.0"
commons-io = "2.20.0"
compose = "1.8.2"
cutterslade-analyze = "1.10.0"
+cuvs-java = "25.10.0"
cuvs-lucene = "25.10.0"
cybozulabs-langdetect = "1.1-20120112"
decompose = "3.3.0"
@@ -319,6 +320,7 @@ codehaus-woodstox-stax2api = { module =
"org.codehaus.woodstox:stax2-api", versi
commonscli-commonscli = { module = "commons-cli:commons-cli", version.ref =
"commons-cli" }
commonscodec-commonscodec = { module = "commons-codec:commons-codec",
version.ref = "commons-codec" }
commonsio-commonsio = { module = "commons-io:commons-io", version.ref =
"commons-io" }
+cuvs-java = { module = "com.nvidia.cuvs:cuvs-java", version.ref = "cuvs-java" }
cuvs-lucene = { module = "com.nvidia.cuvs.lucene:cuvs-lucene", version.ref =
"cuvs-lucene" }
cybozulabs-langdetect = { module = "com.cybozu.labs:langdetect", version.ref =
"cybozulabs-langdetect" }
decompose-decompose = { module = "com.arkivanov.decompose:decompose",
version.ref = "decompose" }
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index cccb6bd8f2e..01e40d9f0fb 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -130,6 +130,7 @@ import org.apache.solr.jersey.InjectionFactories;
import org.apache.solr.jersey.JerseyAppHandlerCache;
import org.apache.solr.logging.LogWatcher;
import org.apache.solr.logging.MDCLoggingContext;
+import org.apache.solr.metrics.GpuMetricsProvider;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricProducer;
import org.apache.solr.metrics.SolrMetricsContext;
@@ -778,6 +779,9 @@ public class CoreContainer {
solrMetricsContext = new SolrMetricsContext(metricManager, NODE_REGISTRY);
+ // Initialize GPU metrics service
+ initGpuMetricsService();
+
shardHandlerFactory =
ShardHandlerFactory.newInstance(cfg.getShardHandlerFactoryPluginInfo(), loader);
if (shardHandlerFactory instanceof SolrMetricProducer metricProducer) {
@@ -1154,6 +1158,59 @@ public class CoreContainer {
}
}
+ private void initGpuMetricsService() {
+ try {
+ Class<?> serviceClass =
Class.forName("org.apache.solr.cuvs.GpuMetricsService");
+ Object serviceObj = serviceClass.getMethod("getInstance").invoke(null);
+
+ if (serviceObj instanceof GpuMetricsProvider provider) {
+ serviceClass.getMethod("initialize",
CoreContainer.class).invoke(serviceObj, this);
+ provider.initializeMetrics(
+ solrMetricsContext,
+ Attributes.builder()
+ .put(SolrMetricProducer.TYPE_ATTR, "gpu")
+ .put(SolrMetricProducer.CATEGORY_ATTR, "system")
+ .build());
+ log.info("GPU metrics service initialized");
+ }
+ } catch (ClassNotFoundException e) {
+ log.debug("cuVS module not available, GPU metrics will not be
collected");
+ } catch (Exception e) {
+ log.warn("Failed to initialize GPU metrics service", e);
+ }
+ }
+
+ private void shutdownGpuMetricsService() {
+ try {
+ Class<?> serviceClass =
Class.forName("org.apache.solr.cuvs.GpuMetricsService");
+ Object serviceObj = serviceClass.getMethod("getInstance").invoke(null);
+
+ if (serviceObj instanceof GpuMetricsProvider) {
+ GpuMetricsProvider provider = (GpuMetricsProvider) serviceObj;
+ provider.close();
+ log.info("GPU metrics service shut down");
+ }
+ } catch (ClassNotFoundException e) {
+ // Expected when cuvs module is not available
+ } catch (Exception e) {
+ log.warn("Failed to shutdown GPU metrics service", e);
+ }
+ }
+
+ public GpuMetricsProvider getGpuMetricsProvider() {
+ try {
+ Class<?> serviceClass =
Class.forName("org.apache.solr.cuvs.GpuMetricsService");
+ Object serviceObj = serviceClass.getMethod("getInstance").invoke(null);
+
+ if (serviceObj instanceof GpuMetricsProvider) {
+ return (GpuMetricsProvider) serviceObj;
+ }
+ } catch (Exception e) {
+ // Module not available
+ }
+ return null;
+ }
+
private volatile boolean isShutDown = false;
public boolean isShutDown() {
@@ -1219,6 +1276,9 @@ public class CoreContainer {
customThreadPool.execute(replayUpdatesExecutor::shutdownAndAwaitTermination);
+ // Shutdown GPU metrics service if it was initialized
+ shutdownGpuMetricsService();
+
if (metricManager != null) {
// Close all OTEL meter providers and metrics
metricManager.closeAllRegistries();
diff --git
a/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
b/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
index ff7af6e45c0..16e78ab4268 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
@@ -55,6 +55,7 @@ import org.apache.solr.core.NodeConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.admin.api.NodeSystemInfoAPI;
+import org.apache.solr.metrics.GpuMetricsProvider;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
@@ -238,6 +239,8 @@ public class SystemInfoHandler extends RequestHandlerBase {
rsp.add("jvm", getJvmInfo(nodeConfig));
rsp.add("security", getSecurityInfo(req));
rsp.add("system", getSystemInfo());
+
+ rsp.add("gpu", getGpuInfo(req));
if (solrCloudMode) {
rsp.add("node", getCoreContainer(req).getZkController().getNodeName());
}
@@ -519,6 +522,50 @@ public class SystemInfoHandler extends RequestHandlerBase {
return Boolean.TRUE;
}
+ private SimpleOrderedMap<Object> getGpuInfo(SolrQueryRequest req) {
+ SimpleOrderedMap<Object> gpuInfo = new SimpleOrderedMap<>();
+
+ try {
+ GpuMetricsProvider provider =
getCoreContainer(req).getGpuMetricsProvider();
+
+ if (provider == null) {
+ gpuInfo.add("available", false);
+ return gpuInfo;
+ }
+
+ long gpuCount = provider.getGpuCount();
+ if (gpuCount > 0) {
+ gpuInfo.add("available", true);
+ gpuInfo.add("count", gpuCount);
+
+ long gpuMemoryTotal = provider.getGpuMemoryTotal();
+ long gpuMemoryUsed = provider.getGpuMemoryUsed();
+ long gpuMemoryFree = provider.getGpuMemoryFree();
+
+ if (gpuMemoryTotal > 0) {
+ SimpleOrderedMap<Object> memory = new SimpleOrderedMap<>();
+ memory.add("total", gpuMemoryTotal);
+ memory.add("used", gpuMemoryUsed);
+ memory.add("free", gpuMemoryFree);
+ gpuInfo.add("memory", memory);
+ }
+
+ var devices = provider.getGpuDevices();
+ if (devices != null && devices.size() > 0) {
+ gpuInfo.add("devices", devices);
+ }
+ } else {
+ gpuInfo.add("available", false);
+ }
+
+ } catch (Exception e) {
+ log.warn("Failed to get GPU information", e);
+ gpuInfo.add("available", false);
+ }
+
+ return gpuInfo;
+ }
+
@Override
public Name getPermissionName(AuthorizationContext request) {
return Name.CONFIG_READ_PERM;
diff --git a/solr/modules/cuvs/build.gradle
b/solr/core/src/java/org/apache/solr/metrics/GpuMetricsProvider.java
similarity index 64%
copy from solr/modules/cuvs/build.gradle
copy to solr/core/src/java/org/apache/solr/metrics/GpuMetricsProvider.java
index d00eed9b1a3..520d85a740f 100644
--- a/solr/modules/cuvs/build.gradle
+++ b/solr/core/src/java/org/apache/solr/metrics/GpuMetricsProvider.java
@@ -14,21 +14,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.solr.metrics;
-apply plugin: 'java-library'
+import java.util.Map;
-description = 'cuVS plugin'
+public interface GpuMetricsProvider extends SolrMetricProducer {
-dependencies {
- implementation libs.cuvs.lucene
- implementation project(':solr:core')
- implementation project(':solr:solrj')
- implementation libs.apache.lucene.core
- implementation libs.slf4j.api
+ long getGpuCount();
- testImplementation project(':solr:test-framework')
- testImplementation libs.apache.lucene.testframework
- testImplementation libs.junit.junit
- testImplementation libs.commonsio.commonsio
-}
+ long getGpuMemoryTotal();
+
+ long getGpuMemoryUsed();
+ long getGpuMemoryFree();
+
+ Map<String, Object> getGpuDevices();
+}
diff --git a/solr/modules/cuvs/build.gradle b/solr/modules/cuvs/build.gradle
index d00eed9b1a3..624b2c94ae0 100644
--- a/solr/modules/cuvs/build.gradle
+++ b/solr/modules/cuvs/build.gradle
@@ -20,7 +20,11 @@ apply plugin: 'java-library'
description = 'cuVS plugin'
dependencies {
- implementation libs.cuvs.lucene
+ implementation(libs.cuvs.lucene) {
+ changing = true
+ }
+ implementation libs.cuvs.java
+ implementation libs.opentelemetry.api
implementation project(':solr:core')
implementation project(':solr:solrj')
implementation libs.apache.lucene.core
diff --git
a/solr/modules/cuvs/src/java/org/apache/solr/cuvs/GpuMetricsService.java
b/solr/modules/cuvs/src/java/org/apache/solr/cuvs/GpuMetricsService.java
new file mode 100644
index 00000000000..9156a55841c
--- /dev/null
+++ b/solr/modules/cuvs/src/java/org/apache/solr/cuvs/GpuMetricsService.java
@@ -0,0 +1,374 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cuvs;
+
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.CuVSResourcesInfo;
+import com.nvidia.cuvs.GPUInfo;
+import com.nvidia.cuvs.GPUInfoProvider;
+import com.nvidia.cuvs.spi.CuVSProvider;
+import io.opentelemetry.api.common.Attributes;
+import io.opentelemetry.api.metrics.ObservableLongGauge;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.metrics.GpuMetricsProvider;
+import org.apache.solr.metrics.SolrMetricManager;
+import org.apache.solr.metrics.SolrMetricsContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class GpuMetricsService implements GpuMetricsProvider {
+
+ private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ private static GpuMetricsService instance;
+
+ private SolrMetricManager metricManager;
+ private SolrMetricsContext metricsContext;
+ private ScheduledExecutorService scheduler;
+
+ private ObservableLongGauge gpuCountGauge;
+ private ObservableLongGauge gpuMemoryTotalGauge;
+ private ObservableLongGauge gpuMemoryUsedGauge;
+ private ObservableLongGauge gpuMemoryFreeGauge;
+
+ private final AtomicLong gpuCount = new AtomicLong(0);
+ private final AtomicLong gpuMemoryTotal = new AtomicLong(0);
+ private final AtomicLong gpuMemoryUsed = new AtomicLong(0);
+ private final AtomicLong gpuMemoryFree = new AtomicLong(0);
+ private final AtomicBoolean initialized = new AtomicBoolean(false);
+ private final AtomicBoolean running = new AtomicBoolean(false);
+ private final AtomicBoolean staticDataInitialized = new AtomicBoolean(false);
+ private final AtomicReference<ConcurrentHashMap<String, Object>> gpuDevices =
+ new AtomicReference<>(new ConcurrentHashMap<>());
+
+ private GpuMetricsService() {}
+
+ public static synchronized GpuMetricsService getInstance() {
+ if (instance == null) {
+ instance = new GpuMetricsService();
+ }
+ return instance;
+ }
+
+ public void initialize(CoreContainer coreContainer) {
+ if (initialized.compareAndSet(false, true)) {
+ this.metricManager = coreContainer.getMetricManager();
+ startBackgroundService();
+ log.info("GPU metrics service initialized");
+ }
+ }
+
+ public void initialize(SolrCore core) {
+ initialize(core.getCoreContainer());
+ }
+
+ @Override
+ public void initializeMetrics(SolrMetricsContext parentContext, Attributes
attributes) {
+ this.metricsContext = parentContext;
+
+ gpuCountGauge =
+ metricManager.observableLongGauge(
+ parentContext.getRegistryName(),
+ "gpu.count",
+ "Number of available GPUs",
+ measurement -> measurement.record(gpuCount.get()),
+ null);
+
+ gpuMemoryTotalGauge =
+ metricManager.observableLongGauge(
+ parentContext.getRegistryName(),
+ "gpu.memory.total",
+ "Total GPU memory in bytes",
+ measurement -> measurement.record(gpuMemoryTotal.get()),
+ null);
+
+ gpuMemoryUsedGauge =
+ metricManager.observableLongGauge(
+ parentContext.getRegistryName(),
+ "gpu.memory.used",
+ "Used GPU memory in bytes",
+ measurement -> measurement.record(gpuMemoryUsed.get()),
+ null);
+
+ gpuMemoryFreeGauge =
+ metricManager.observableLongGauge(
+ parentContext.getRegistryName(),
+ "gpu.memory.free",
+ "Free GPU memory in bytes",
+ measurement -> measurement.record(gpuMemoryFree.get()),
+ null);
+
+ log.info("GPU metrics registered with OpenTelemetry");
+ }
+
+ @Override
+ public SolrMetricsContext getSolrMetricsContext() {
+ return metricsContext;
+ }
+
+ private void startBackgroundService() {
+ scheduler =
+ Executors.newSingleThreadScheduledExecutor(
+ r -> {
+ Thread t = new Thread(r, "gpu-metrics-collector");
+ t.setDaemon(true);
+ return t;
+ });
+
+ running.set(true);
+ scheduler.scheduleWithFixedDelay(this::updateGpuMetrics, 0, 5,
TimeUnit.SECONDS);
+ log.info("GPU metrics background service started");
+ }
+
+ private void updateGpuMetrics() {
+ if (!running.get()) {
+ return;
+ }
+
+ try {
+ GPUInfoProvider gpuInfoProvider =
CuVSProvider.provider().gpuInfoProvider();
+ List<GPUInfo> gpuObjects = gpuInfoProvider.availableGPUs();
+
+ gpuCount.set(gpuObjects.size());
+
+ if (!gpuObjects.isEmpty()) {
+ // Initialize static data only once
+ if (!staticDataInitialized.get()) {
+ updateStaticDeviceInfo(gpuObjects);
+ staticDataInitialized.set(true);
+ }
+ // Update dynamic memory metrics every time
+ updateDynamicMemoryMetrics(gpuInfoProvider);
+ } else {
+ resetDataMetrics();
+ staticDataInitialized.set(false);
+ }
+
+ } catch (Exception e) {
+ log.warn("Failed to update GPU metrics", e);
+ resetMetrics();
+ }
+ }
+
+ private void resetMetrics() {
+ gpuCount.set(0);
+ staticDataInitialized.set(false);
+ resetDataMetrics();
+ }
+
+ private void resetDataMetrics() {
+ gpuMemoryTotal.set(0);
+ gpuMemoryUsed.set(0);
+ gpuMemoryFree.set(0);
+ gpuDevices.set(new ConcurrentHashMap<>());
+ }
+
+ private void updateDynamicMemoryMetrics(GPUInfoProvider gpuInfoProvider) {
+ try {
+ long totalUsed = 0;
+ long totalFree = 0;
+
+ ConcurrentHashMap<String, Object> currentDevices = gpuDevices.get();
+
+ CuVSResources resources = CuVSResources.create();
+ try {
+ CuVSResourcesInfo currentInfo =
gpuInfoProvider.getCurrentInfo(resources);
+ if (currentInfo != null) {
+ long firstGpuFreeMemory = currentInfo.freeDeviceMemoryInBytes();
+
+ String firstDeviceKey = null;
+ ConcurrentHashMap<String, Object> firstDevice = null;
+ for (String deviceKey : currentDevices.keySet()) {
+ firstDeviceKey = deviceKey;
+ @SuppressWarnings("unchecked")
+ ConcurrentHashMap<String, Object> device =
+ (ConcurrentHashMap<String, Object>)
currentDevices.get(deviceKey);
+ firstDevice = device;
+ break;
+ }
+
+ if (firstDevice != null) {
+ long firstGpuTotalMemory = (Long) firstDevice.get("totalMemory");
+ long firstGpuUsedMemory = firstGpuTotalMemory - firstGpuFreeMemory;
+
+ // Mark the first GPU as active and update with actual memory
values
+ firstDevice.put("usedMemory", firstGpuUsedMemory);
+ firstDevice.put("freeMemory", firstGpuFreeMemory);
+ firstDevice.put("active", true);
+
+ totalUsed = firstGpuUsedMemory;
+ totalFree = firstGpuFreeMemory;
+
+ log.debug(
+ "Updated active GPU {} memory: used={}, free={}, total={}",
+ firstDeviceKey,
+ firstGpuUsedMemory,
+ firstGpuFreeMemory,
+ firstGpuTotalMemory);
+
+ // Setting other GPUs as inactive (no memory data, just names/IDs)
+ for (String deviceKey : currentDevices.keySet()) {
+ if (!deviceKey.equals(firstDeviceKey)) {
+ @SuppressWarnings("unchecked")
+ ConcurrentHashMap<String, Object> device =
+ (ConcurrentHashMap<String, Object>)
currentDevices.get(deviceKey);
+ device.put("active", false);
+ }
+ }
+ }
+ } else {
+ String firstDeviceKey = null;
+ for (String deviceKey : currentDevices.keySet()) {
+ @SuppressWarnings("unchecked")
+ ConcurrentHashMap<String, Object> device =
+ (ConcurrentHashMap<String, Object>)
currentDevices.get(deviceKey);
+
+ if (firstDeviceKey == null) {
+ // First GPU is active and gets memory data
+ firstDeviceKey = deviceKey;
+ long deviceTotalMemory = (Long) device.get("totalMemory");
+ device.put("usedMemory", 0L);
+ device.put("freeMemory", deviceTotalMemory);
+ device.put("active", true);
+ totalFree = deviceTotalMemory;
+ } else {
+ // Other GPUs are inactive
+ device.put("active", false);
+ }
+ }
+ }
+ } finally {
+ resources.close();
+ }
+
+ gpuMemoryUsed.set(totalUsed);
+ gpuMemoryFree.set(totalFree);
+
+ } catch (Throwable e) {
+ log.warn("Failed to update dynamic memory metrics", e);
+ }
+ }
+
+ private void updateStaticDeviceInfo(List<GPUInfo> gpuObjects) {
+ try {
+ ConcurrentHashMap<String, Object> devices = new ConcurrentHashMap<>();
+ long totalMemoryAllGpus = 0;
+
+ for (GPUInfo gpuInfo : gpuObjects) {
+ ConcurrentHashMap<String, Object> gpuDetails = new
ConcurrentHashMap<>();
+
+ // Static properties that don't change
+ int gpuId = gpuInfo.gpuId();
+ String name = gpuInfo.name();
+ long totalMemory = gpuInfo.totalDeviceMemoryInBytes();
+ int computeMajor = gpuInfo.computeCapabilityMajor();
+ int computeMinor = gpuInfo.computeCapabilityMinor();
+ boolean concurrentCopy = gpuInfo.supportsConcurrentCopy();
+ boolean concurrentKernels = gpuInfo.supportsConcurrentKernels();
+
+ gpuDetails.put("id", gpuId);
+ gpuDetails.put("name", name);
+ gpuDetails.put("totalMemory", totalMemory);
+ gpuDetails.put("computeCapability", computeMajor + "." + computeMinor);
+ gpuDetails.put("supportsConcurrentCopy", concurrentCopy);
+ gpuDetails.put("supportsConcurrentKernels", concurrentKernels);
+ gpuDetails.put("usedMemory", 0L);
+ gpuDetails.put("freeMemory", totalMemory);
+ gpuDetails.put("active", gpuId == 0); // First GPU (ID 0) is active by
default
+
+ devices.put("gpu_" + gpuId, gpuDetails);
+ totalMemoryAllGpus += totalMemory;
+ }
+
+ // Set static data only once
+ gpuDevices.set(devices);
+ gpuMemoryTotal.set(totalMemoryAllGpus);
+
+ } catch (Exception e) {
+ log.warn("Failed to update static device info", e);
+ }
+ }
+
+ @Override
+ public Map<String, Object> getGpuDevices() {
+ return gpuDevices.get();
+ }
+
+ @Override
+ public long getGpuCount() {
+ return gpuCount.get();
+ }
+
+ @Override
+ public long getGpuMemoryTotal() {
+ return gpuMemoryTotal.get();
+ }
+
+ @Override
+ public long getGpuMemoryUsed() {
+ return gpuMemoryUsed.get();
+ }
+
+ @Override
+ public long getGpuMemoryFree() {
+ return gpuMemoryFree.get();
+ }
+
+ @Override
+ public void close() throws IOException {
+ running.set(false);
+
+ if (gpuCountGauge != null) {
+ gpuCountGauge.close();
+ }
+ if (gpuMemoryTotalGauge != null) {
+ gpuMemoryTotalGauge.close();
+ }
+ if (gpuMemoryUsedGauge != null) {
+ gpuMemoryUsedGauge.close();
+ }
+ if (gpuMemoryFreeGauge != null) {
+ gpuMemoryFreeGauge.close();
+ }
+
+ if (scheduler != null) {
+ scheduler.shutdownNow();
+ log.info("GPU metrics service shut down");
+ }
+
+ GpuMetricsProvider.super.close();
+ }
+
+ public void shutdown() {
+ try {
+ close();
+ } catch (IOException e) {
+ log.warn("Error during shutdown", e);
+ }
+ }
+}
diff --git a/solr/webapp/web/css/angular/index.css
b/solr/webapp/web/css/angular/index.css
index 2f4c5394085..5b6471ff15b 100644
--- a/solr/webapp/web/css/angular/index.css
+++ b/solr/webapp/web/css/angular/index.css
@@ -235,3 +235,10 @@ limitations under the License.
#content #index #security h2 { background-image: url(
../../img/ico/prohibition.png ); }
#content #index #security div { text-align: right; }
+/* GPU section styling */
+#content #index #gpu .data li dt { width: 35%; }
+#content #index #gpu .data li dd { width: 65%; }
+#content #index #gpu .gpu-memory-bar-container {
+ margin-bottom: 15px;
+}
+
diff --git a/solr/webapp/web/js/angular/controllers/index.js
b/solr/webapp/web/js/angular/controllers/index.js
index e7b09683e8f..eda5ea86c28 100644
--- a/solr/webapp/web/js/angular/controllers/index.js
+++ b/solr/webapp/web/js/angular/controllers/index.js
@@ -60,6 +60,54 @@ solrAdminApp.controller('IndexController', function($scope,
System, Cores, Const
$scope.javaMemoryUsedDisplay =
pretty_print_bytes($scope.javaMemoryUsed); // @todo These should really be an
AngularJS Filter: {{ javaMemoryUsed | bytes }}
$scope.javaMemoryMax = pretty_print_bytes(javaMemoryMax);
+ // GPU
+ $scope.gpuAvailable = data.gpu && data.gpu.available;
+ if ($scope.gpuAvailable) {
+ $scope.gpuCount = data.gpu.count;
+
+ var devices = data.gpu.devices;
+ $scope.gpuDevices = [];
+ if (devices && Object.keys(devices).length > 0) {
+ var deviceKeys = Object.keys(devices);
+ var firstDevice = devices[deviceKeys[0]];
+ $scope.gpuName = firstDevice.name;
+ $scope.gpuId = firstDevice.id;
+ $scope.gpuCompute = firstDevice.computeCapability;
+
+ if (deviceKeys.length > 1) {
+ $scope.gpuName += " (+" + (deviceKeys.length - 1) + " more)";
+ }
+
+ for (var i = 0; i < deviceKeys.length; i++) {
+ var device = devices[deviceKeys[i]];
+ var gpuData = {
+ id: device.id,
+ name: device.name,
+ computeCapability: device.computeCapability,
+ totalMemory: device.totalMemory,
+ usedMemory: device.usedMemory,
+ freeMemory: device.freeMemory,
+ active: device.active
+ };
+
+ // Add "(active)" indicator to the name for active GPUs
+ if (gpuData.active) {
+ gpuData.name += " (active)";
+ }
+
+ // Only calculate memory display for active GPUs
+ if (gpuData.active && gpuData.totalMemory && gpuData.usedMemory) {
+ var total = parse_memory_value(gpuData.totalMemory);
+ var used = parse_memory_value(gpuData.usedMemory);
+ gpuData.memoryPercentage = (used / total * 100).toFixed(1) + "%";
+ gpuData.totalMemoryDisplay = pretty_print_bytes(total);
+ gpuData.usedMemoryDisplay = pretty_print_bytes(used);
+ }
+ $scope.gpuDevices.push(gpuData);
+ }
+ }
+ }
+
// no info bar:
$scope.noInfo = !(
data.system.totalPhysicalMemorySize &&
data.system.freePhysicalMemorySize &&
diff --git a/solr/webapp/web/partials/index.html
b/solr/webapp/web/partials/index.html
index d96b8228028..a417d7d5fc8 100644
--- a/solr/webapp/web/partials/index.html
+++ b/solr/webapp/web/partials/index.html
@@ -77,12 +77,12 @@ limitations under the License.
<dt><span>solr-spec</span></dt>
<dd>{{system.lucene["solr-spec-version"]}}</dd>
</dl></li>
-
+
<li class="solr_impl_version"><dl class="clearfix">
<dt class=""><span>solr-impl</span></dt>
<dd>{{system.lucene["solr-impl-version"]}}</dd>
</dl></li>
-
+
<li class="lucene lucene_spec_version"><dl class="clearfix">
<dt><span>lucene-spec</span></dt>
<dd>{{system.lucene["lucene-spec-version"]}}</dd>
@@ -92,7 +92,7 @@ limitations under the License.
<dt><span>lucene-impl</span></dt>
<dd>{{system.lucene["lucene-impl-version"]}}</dd>
</dl></li>
-
+
</ul>
</div>
@@ -103,7 +103,7 @@ limitations under the License.
</div>
</div>
-
+
<div class="index-right">
<div class="block" id="system">
@@ -256,6 +256,49 @@ limitations under the License.
</div>
</div>
+ <div id="gpu" ng-show="gpuAvailable">
+ <h2><span>GPU</span>
+ <small class="bar-desc">{{gpuCount}} detected</small>
+ </h2>
+
+ <div class="content clearfix">
+ <ul class="data">
+ <li class="gpu-name"><dl class="clearfix">
+ <dt><span>Name</span></dt>
+ <dd>{{gpuName}}</dd>
+ </dl></li>
+
+ <li class="gpu-compute"><dl class="clearfix">
+ <dt><span>Compute</span></dt>
+ <dd>{{gpuCompute}}</dd>
+ </dl></li>
+
+ </ul>
+
+ <div ng-repeat="gpu in gpuDevices"
class="gpu-memory-bar-container">
+ <!-- Memory bar for active GPU -->
+ <div class="gpu-memory-bar" ng-show="gpu.active &&
gpu.totalMemory && gpu.usedMemory">
+ <p data-desc="gpu-memory-bar">{{gpu.name}} Memory
+ <small class="bar-desc">{{gpu.memoryPercentage}}</small>
+ </p>
+ <div class="bar-holder bar-lvl-2">
+ <div class="bar-max bar">
+ <span class="bar-max
val">{{gpu.totalMemoryDisplay}}</span>
+ <div class="bar-total bar" ng-style="{width:
gpu.memoryPercentage}">
+ <span class="bar-total val" title="raw:
{{gpu.usedMemory}}B">{{gpu.usedMemoryDisplay}}</span>
+ </div>
+ </div>
+ </div>
+ </div>
+
+ <!-- Simple list entry for inactive GPUs -->
+ <div class="gpu-info" ng-show="!gpu.active">
+ <p>{{gpu.name}} (ID: {{gpu.id}}, Compute:
{{gpu.computeCapability}})</p>
+ </div>
+ </div>
+ </div>
+ </div>
+
</div>
</div>