This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d40b9fba49 Broker: add SLA-style per-query error metrics (#17457)
7d40b9fba49 is described below

commit 7d40b9fba49cccbe24680f5773c4ebc920632559
Author: Songqiao Su <[email protected]>
AuthorDate: Tue Jan 6 19:04:41 2026 -0800

    Broker: add SLA-style per-query error metrics (#17457)
---
 .../apache/pinot/common/metrics/BrokerMeter.java   |  6 +++++
 .../pinot/common/response/BrokerResponse.java      | 14 ++++++++++++
 .../apache/pinot/spi/exception/QueryErrorCode.java | 26 ++++++++++++++++++++++
 3 files changed, 46 insertions(+)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java
index c0e730a1ca6..c782c70062a 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java
@@ -285,6 +285,12 @@ public class BrokerMeter implements AbstractMetrics.Meter {
    */
   public static final BrokerMeter QUERY_RESPONSE_SIZE_BYTES = 
create("QUERY_RESPONSE_SIZE_BYTES", "bytes", true);
 
+  /**
+   * SLA-style per-query error classification metrics.
+   */
+  public static final BrokerMeter QUERY_CRITICAL_ERROR = 
create("QUERY_CRITICAL_ERROR", "queries", true);
+  public static final BrokerMeter QUERY_NON_CRITICAL_ERROR = 
create("QUERY_NON_CRITICAL_ERROR", "queries", true);
+
   private static final Map<QueryErrorCode, BrokerMeter> 
QUERY_ERROR_CODE_METER_MAP;
 
   // Iterate through all query error codes from QueryErrorCode.getAllValues() 
and create a metric for each
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
index 0672b7d8470..fc800024719 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
@@ -76,6 +76,8 @@ public interface BrokerResponse {
    * This method ensures we emit metrics for all queries that have exceptions 
with a one-to-one mapping.
    */
   default void emitBrokerResponseMetrics(BrokerMetrics brokerMetrics) {
+    boolean hasCriticalError = false;
+    boolean hasNonCriticalError = false;
     for (QueryProcessingException exception : this.getExceptions()) {
       QueryErrorCode queryErrorCode;
       try {
@@ -85,6 +87,18 @@ public interface BrokerResponse {
         queryErrorCode = QueryErrorCode.UNKNOWN;
       }
       
brokerMetrics.addMeteredGlobalValue(BrokerMeter.getQueryErrorMeter(queryErrorCode),
 1);
+      if (queryErrorCode.isCriticalError()) {
+        hasCriticalError = true;
+      } else {
+        hasNonCriticalError = true;
+      }
+    }
+    // Emit exactly one SLA-style metric per query if there are any exceptions
+    if (hasCriticalError) {
+      brokerMetrics.addMeteredGlobalValue(BrokerMeter.QUERY_CRITICAL_ERROR, 1);
+    }
+    if (hasNonCriticalError) {
+      
brokerMetrics.addMeteredGlobalValue(BrokerMeter.QUERY_NON_CRITICAL_ERROR, 1);
     }
   }
 
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java
index 1975d7ce976..b7fac4e03fd 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.spi.exception;
 
 import java.util.EnumMap;
+import java.util.EnumSet;
 import java.util.Map;
 import javax.annotation.Nonnegative;
 import javax.ws.rs.core.Response;
@@ -68,6 +69,23 @@ public enum QueryErrorCode {
 
   private static final QueryErrorCode[] BY_ID;
 
+  // Static set of SLA-critical (system) error codes
+  private static final EnumSet<QueryErrorCode> CRITICAL_ERROR_CODES = 
EnumSet.of(
+      SQL_RUNTIME,
+      INTERNAL,
+      QUERY_SCHEDULING_TIMEOUT,
+      EXECUTION_TIMEOUT,
+      BROKER_TIMEOUT,
+      SERVER_SEGMENT_MISSING,
+      BROKER_SEGMENT_UNAVAILABLE,
+      SERVER_NOT_RESPONDING,
+      BROKER_REQUEST_SEND,
+      MERGE_RESPONSE,
+      QUERY_CANCELLATION,
+      SERVER_SHUTTING_DOWN,
+      QUERY_PLANNING
+  );
+
   static {
     int maxId = -1;
     for (QueryErrorCode queryErrorCode : QueryErrorCode.values()) {
@@ -173,4 +191,12 @@ public enum QueryErrorCode {
         return false;
     }
   }
+
+  /**
+   * Returns true if the error is considered critical for SLA accounting.
+   * Critical errors represent system-side failures (timeouts, internal 
errors, infra issues, etc.).
+   */
+  public boolean isCriticalError() {
+    return CRITICAL_ERROR_CODES.contains(this);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to