This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 7d40b9fba49 Broker: add SLA-style per-query error metrics (#17457)
7d40b9fba49 is described below
commit 7d40b9fba49cccbe24680f5773c4ebc920632559
Author: Songqiao Su <[email protected]>
AuthorDate: Tue Jan 6 19:04:41 2026 -0800
Broker: add SLA-style per-query error metrics (#17457)
---
.../apache/pinot/common/metrics/BrokerMeter.java | 6 +++++
.../pinot/common/response/BrokerResponse.java | 14 ++++++++++++
.../apache/pinot/spi/exception/QueryErrorCode.java | 26 ++++++++++++++++++++++
3 files changed, 46 insertions(+)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java
index c0e730a1ca6..c782c70062a 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java
@@ -285,6 +285,12 @@ public class BrokerMeter implements AbstractMetrics.Meter {
*/
public static final BrokerMeter QUERY_RESPONSE_SIZE_BYTES =
create("QUERY_RESPONSE_SIZE_BYTES", "bytes", true);
+ /**
+ * SLA-style per-query error classification metrics.
+ */
+ public static final BrokerMeter QUERY_CRITICAL_ERROR =
create("QUERY_CRITICAL_ERROR", "queries", true);
+ public static final BrokerMeter QUERY_NON_CRITICAL_ERROR =
create("QUERY_NON_CRITICAL_ERROR", "queries", true);
+
private static final Map<QueryErrorCode, BrokerMeter>
QUERY_ERROR_CODE_METER_MAP;
// Iterate through all query error codes from QueryErrorCode.getAllValues()
and create a metric for each
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
b/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
index 0672b7d8470..fc800024719 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/response/BrokerResponse.java
@@ -76,6 +76,8 @@ public interface BrokerResponse {
* This method ensures we emit metrics for all queries that have exceptions
with a one-to-one mapping.
*/
default void emitBrokerResponseMetrics(BrokerMetrics brokerMetrics) {
+ boolean hasCriticalError = false;
+ boolean hasNonCriticalError = false;
for (QueryProcessingException exception : this.getExceptions()) {
QueryErrorCode queryErrorCode;
try {
@@ -85,6 +87,18 @@ public interface BrokerResponse {
queryErrorCode = QueryErrorCode.UNKNOWN;
}
brokerMetrics.addMeteredGlobalValue(BrokerMeter.getQueryErrorMeter(queryErrorCode),
1);
+ if (queryErrorCode.isCriticalError()) {
+ hasCriticalError = true;
+ } else {
+ hasNonCriticalError = true;
+ }
+ }
+ // Emit exactly one SLA-style metric per query if there are any exceptions
+ if (hasCriticalError) {
+ brokerMetrics.addMeteredGlobalValue(BrokerMeter.QUERY_CRITICAL_ERROR, 1);
+ }
+ if (hasNonCriticalError) {
+
brokerMetrics.addMeteredGlobalValue(BrokerMeter.QUERY_NON_CRITICAL_ERROR, 1);
}
}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java
index 1975d7ce976..b7fac4e03fd 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/exception/QueryErrorCode.java
@@ -19,6 +19,7 @@
package org.apache.pinot.spi.exception;
import java.util.EnumMap;
+import java.util.EnumSet;
import java.util.Map;
import javax.annotation.Nonnegative;
import javax.ws.rs.core.Response;
@@ -68,6 +69,23 @@ public enum QueryErrorCode {
private static final QueryErrorCode[] BY_ID;
+ // Static set of SLA-critical (system) error codes
+ private static final EnumSet<QueryErrorCode> CRITICAL_ERROR_CODES =
EnumSet.of(
+ SQL_RUNTIME,
+ INTERNAL,
+ QUERY_SCHEDULING_TIMEOUT,
+ EXECUTION_TIMEOUT,
+ BROKER_TIMEOUT,
+ SERVER_SEGMENT_MISSING,
+ BROKER_SEGMENT_UNAVAILABLE,
+ SERVER_NOT_RESPONDING,
+ BROKER_REQUEST_SEND,
+ MERGE_RESPONSE,
+ QUERY_CANCELLATION,
+ SERVER_SHUTTING_DOWN,
+ QUERY_PLANNING
+ );
+
static {
int maxId = -1;
for (QueryErrorCode queryErrorCode : QueryErrorCode.values()) {
@@ -173,4 +191,12 @@ public enum QueryErrorCode {
return false;
}
}
+
+ /**
+ * Returns true if the error is considered critical for SLA accounting.
+ * Critical errors represent system-side failures (timeouts, internal
errors, infra issues, etc.).
+ */
+ public boolean isCriticalError() {
+ return CRITICAL_ERROR_CODES.contains(this);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]