This is an automated email from the ASF dual-hosted git repository. wankai123 pushed a commit to branch fix-alarm-rule in repository https://gitbox.apache.org/repos/asf/skywalking-horizon-ui.git
commit 1e2022f63e68eff3bbf229133d7108b00a2117de Author: wankai123 <[email protected]> AuthorDate: Mon Jun 1 15:52:46 2026 +0800 Fix Alerting rules running contest doesn't show on which OAP node and the running details. --- CHANGELOG.md | 24 ++ apps/bff/src/http/admin/alarm-rules.ts | 72 ++++ apps/bff/src/rbac/route-policy.ts | 1 + apps/ui/src/api/client.ts | 36 ++ apps/ui/src/api/scopes/alarms.ts | 12 + .../operate/alerting-rules/AlertingRulesView.vue | 368 ++++++++++++++++++++- apps/ui/src/i18n/locales/en.json | 11 + packages/api-client/src/alarm-status.ts | 73 +++- packages/api-client/src/index.ts | 4 + 9 files changed, 585 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 211d569..766784d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -189,6 +189,30 @@ sync-status banners count source rows only. picker lists the canonical English bundled dashboards once each, and the preview renders the English source as the baseline. +### Alerting rules — running entities show their OAP node + +The **Operate › Alerting rules** detail pane's **Currently watching** +list now spans the whole cluster and tags each entity with the OAP +node evaluating it. Each OAP instance evaluates a rule independently +over the slice of entities it holds, so the watched set is the union +across nodes — the page previously showed only the first responding +node's entities, which misread as "these are all the entities the rule +watches." The list now aggregates every instance's entities and labels +each row with its node (e.g. `SERVICE agent::app NODE 10.116.3.26_11800`), +with the per-entity alarm message on hover. The per-node load-state +table is unchanged. Single-instance deployments simply show one node +label per row. + +Clicking a watched entity now opens a **running-context popup** — the +live evaluation window the rule is computing for that entity, per OAP +node. It shows the current state (`FIRING` / `SILENCED_FIRING` / +`RECOVERY_OBSERVATION`), the window size and silence / recovery +countdowns, the window end, the last-alarm time and message, and the +per-metric snapshot the expression was evaluated against — rendered as +a sparkline plus per-bucket values so an operator can see exactly why a +rule is (or isn't) firing. Nodes not evaluating the entity are marked +as such, and a raw-JSON disclosure carries the full payload. + ### Live debugger fixes A clutch of small but visible bugs were caught while exercising the diff --git a/apps/bff/src/http/admin/alarm-rules.ts b/apps/bff/src/http/admin/alarm-rules.ts index cb98409..2967f4c 100644 --- a/apps/bff/src/http/admin/alarm-rules.ts +++ b/apps/bff/src/http/admin/alarm-rules.ts @@ -24,6 +24,10 @@ * round-trip per rule, runs * in parallel). * GET /api/admin/alarm-rules/:id — full detail for one rule. + * GET /api/admin/alarm-rules/:id/context?entity=… + * — running window for one entity + * (the metric snapshot the rule + * is evaluating right now). * * Read-only. The OAP alarm-rule lifecycle is "edit the YAML, restart * (or let the watcher pick up the change)"; no mutation surface @@ -38,6 +42,7 @@ import type { FastifyInstance, FastifyReply, FastifyRequest } from 'fastify'; import type { AlarmRuleDetail, + AlarmRunningContext, AlarmStatusClient, ClusterAlarmStatus, FetchLike, @@ -109,6 +114,26 @@ export interface AlertingRuleDetailResponse { }>; } +export interface AlertingRuleContextNode { + address: string; + ok: boolean; + error?: string; + /** Running window for this entity on this node. Null on a node that + * isn't evaluating the entity (or that failed). */ + context: AlarmRunningContext | null; +} + +export interface AlertingRuleContextResponse { + ruleId: string; + entityName: string; + generatedAt: number; + reachable: boolean; + error?: string; + /** Per-node running context. Only the node evaluating the entity + * carries a populated body; the rest are stubs. */ + nodes: AlertingRuleContextNode[]; +} + /* Pivot a per-rule x per-node matrix from the two-step fan-out. */ function pivot( listResp: ClusterAlarmStatus<{ ruleList: Array<{ id: string }> }>, @@ -269,4 +294,51 @@ export function registerAlarmRulesRoutes( return reply.send(body); }, ); + + // ── GET /api/admin/alarm-rules/:id/context?entity=… ─────────────── + // `entity` rides as a query param (not a path segment) because entity + // names carry `::` and may carry `/` (endpoint scope) — path-segment + // encoding of those is a portability minefield across proxies. + app.get( + '/api/admin/alarm-rules/:id/context', + { preHandler: auth }, + async (req: FastifyRequest, reply: FastifyReply) => { + const id = (req.params as { id?: string }).id; + const entity = (req.query as { entity?: string }).entity; + if (!id) return reply.code(400).send({ error: 'missing_id' }); + if (!entity) return reply.code(400).send({ error: 'missing_entity' }); + const c = client(); + let env: ClusterAlarmStatus<AlarmRunningContext>; + try { + env = await c.ruleContext(id, entity); + } catch (err) { + const status = + err instanceof AlarmStatusApiError && err.status === 404 ? 404 : 502; + return reply.code(status).send({ + ruleId: id, + entityName: entity, + generatedAt: Date.now(), + reachable: false, + error: err instanceof Error ? err.message : String(err), + nodes: [], + } satisfies AlertingRuleContextResponse); + } + const nodes = env.oapInstances.map<AlertingRuleContextNode>( + (i: InstanceAlarmStatus<AlarmRunningContext>) => ({ + address: i.address, + ok: !i.errorMsg && !!i.status, + error: i.errorMsg ?? undefined, + context: i.status ?? null, + }), + ); + const body: AlertingRuleContextResponse = { + ruleId: id, + entityName: entity, + generatedAt: Date.now(), + reachable: nodes.some((n) => n.ok), + nodes, + }; + return reply.send(body); + }, + ); } diff --git a/apps/bff/src/rbac/route-policy.ts b/apps/bff/src/rbac/route-policy.ts index 7614180..0383d0a 100644 --- a/apps/bff/src/rbac/route-policy.ts +++ b/apps/bff/src/rbac/route-policy.ts @@ -191,6 +191,7 @@ export const ROUTE_POLICY: Record<string, RoutePolicy> = { // ── Alarm-rule catalog (admin read-only) ───────────────────────── 'GET /api/admin/alarm-rules': 'alarm-rule:read', 'GET /api/admin/alarm-rules/:id': 'alarm-rule:read', + 'GET /api/admin/alarm-rules/:id/context': 'alarm-rule:read', // ── Overview-template editor (admin) ───────────────────────────── // The admin editor is an operate-only surface — even reading the diff --git a/apps/ui/src/api/client.ts b/apps/ui/src/api/client.ts index 463606d..03198a2 100644 --- a/apps/ui/src/api/client.ts +++ b/apps/ui/src/api/client.ts @@ -546,6 +546,42 @@ export interface AlertingRuleDetailResponse { detail: AlarmRuleDetail | null; nodes: Array<{ address: string; ok: boolean; error?: string; detail: AlarmRuleDetail | null }>; } +/** Per-entity running window from `/status/alarm/{ruleId}/{entityName}`. + * Only the node evaluating the entity returns a populated body; other + * nodes return a stub and omit the evaluation-only fields. Mirrors the + * BFF's `AlarmRunningContext`. */ +export interface AlarmRunningContext { + ruleId: string; + expression: string; + endTime?: string; + additionalPeriod: number; + size: number; + silencePeriod?: number; + recoveryObservationPeriod?: number; + silenceCountdown: number; + recoveryObservationCountdown: number; + currentState?: string; + entityName?: string; + windowValues: Array<{ index: number; metrics: Array<{ name: string; timeBucket: number; value: string }> }>; + /** Metric name → JSON-encoded MQE series array. */ + mqeMetricsSnapshot?: Record<string, string>; + lastAlarmTime: number | string; + lastAlarmMessage?: string; + lastAlarmMqeMetricsSnapshot?: Record<string, string>; +} +/** One series inside a parsed `mqeMetricsSnapshot` value. */ +export interface AlarmMqeSnapshotSeries { + metric: { labels: Array<{ key: string; value: string }> }; + values: Array<{ id: string; doubleValue: number; isEmptyValue: boolean }>; +} +export interface AlertingRuleContextResponse { + ruleId: string; + entityName: string; + generatedAt: number; + reachable: boolean; + error?: string; + nodes: Array<{ address: string; ok: boolean; error?: string; context: AlarmRunningContext | null }>; +} /** Allowed values for `AlarmsConfig.defaultWindowMs`, in ms. Matches * the alarms page's preset list so the admin's choice always * corresponds to a real tab. */ diff --git a/apps/ui/src/api/scopes/alarms.ts b/apps/ui/src/api/scopes/alarms.ts index 13c5f1f..cb8e2dc 100644 --- a/apps/ui/src/api/scopes/alarms.ts +++ b/apps/ui/src/api/scopes/alarms.ts @@ -20,6 +20,7 @@ import type { AlarmsCountResponse, AlarmsQuery, AlarmsResponse, + AlertingRuleContextResponse, AlertingRuleDetailResponse, AlertingRulesListResponse, BffClient, @@ -87,4 +88,15 @@ export class AlarmsApi { `/api/admin/alarm-rules/${encodeURIComponent(id)}`, ); } + + /** Per-entity running window — the metric snapshot the rule is + * evaluating for one entity right now, per OAP node. Drives the + * alerting-rules row-click popup. */ + adminRuleContext(id: string, entityName: string): Promise<AlertingRuleContextResponse> { + const p = new URLSearchParams({ entity: entityName }); + return this.bff.request<AlertingRuleContextResponse>( + 'GET', + `/api/admin/alarm-rules/${encodeURIComponent(id)}/context?${p.toString()}`, + ); + } } diff --git a/apps/ui/src/features/operate/alerting-rules/AlertingRulesView.vue b/apps/ui/src/features/operate/alerting-rules/AlertingRulesView.vue index ad79011..d09bd14 100644 --- a/apps/ui/src/features/operate/alerting-rules/AlertingRulesView.vue +++ b/apps/ui/src/features/operate/alerting-rules/AlertingRulesView.vue @@ -26,8 +26,9 @@ │ service_resp_time_rule │ rule body (period, silence, │ │ bundled · loaded 3/3 │ recovery, hooks, metrics) │ │ jvm_old_gen_rule │ trigger expression │ - │ bundled · loaded 3/3 │ per-OAP-node load state │ - │ … │ running entities │ + │ bundled · loaded 3/3 │ running entities, each tagged │ + │ … │ with the OAP node watching it │ + │ │ per-OAP-node load state │ └─────────────────────────┴────────────────────────────────┘ Read-only by design — alarm-rule edits go through the YAML file + @@ -39,11 +40,22 @@ import { computed, ref, watch } from 'vue'; import { useRoute, useRouter } from 'vue-router'; import { useI18n } from 'vue-i18n'; import { useQuery } from '@tanstack/vue-query'; -import { bff, type AlertingRuleSummary } from '@/api/client'; +import { + bff, + type AlertingRuleSummary, + type AlarmRunningContext, + type AlarmMqeSnapshotSeries, +} from '@/api/client'; +import Modal from '@/features/operate/_shared/Modal.vue'; +import Sparkline from '@/components/charts/Sparkline.vue'; +import { useOapInfo } from '@/shell/useOapInfo'; const { t } = useI18n(); const route = useRoute(); const router = useRouter(); +/* OAP timezone offset (minutes east of UTC) — used to re-anchor the + * server-local window/bucket times to a real instant before display. */ +const { timezone } = useOapInfo(); const listQuery = useQuery({ queryKey: ['operate/alerting-rules'], @@ -99,6 +111,121 @@ const detailQuery = useQuery({ const detail = computed(() => detailQuery.data.value?.detail ?? selectedSummary.value?.detail ?? null); const detailNodes = computed(() => detailQuery.data.value?.nodes ?? []); + +/* Each OAP instance evaluates the rule over the slice of entities it + * holds, so the watched set is the UNION across nodes — and the node + * is load-bearing, not noise (the same rule watches different entities + * on different instances). Flatten the per-node detail into one list, + * tagging each entity with the instance watching it. Until the per-node + * fetch lands we fall back to the summary's best-node entities (no node + * label yet) so the section doesn't blink empty. */ +const watching = computed(() => { + if (detailNodes.value.length > 0) { + return detailNodes.value.flatMap((n) => + (n.detail?.runningEntities ?? []).map((re) => ({ + scope: re.scope, + name: re.name, + message: re.formattedMessage, + node: n.address, + })), + ); + } + return (detail.value?.runningEntities ?? []).map((re) => ({ + scope: re.scope, + name: re.name, + message: re.formattedMessage, + node: '', + })); +}); + +/* Row-click popup: the rule's live running window for ONE entity, + * fetched on demand from /status/alarm/{ruleId}/{entityName}. The + * endpoint answers per-node, but only the node evaluating the entity + * returns a populated body; the rest are stubs we render compactly. */ +const selectedEntity = ref<{ scope: string; name: string } | null>(null); +const contextQuery = useQuery({ + queryKey: computed(() => ['operate/alerting-rule-context', selectedId.value, selectedEntity.value?.name]), + queryFn: () => bff.alarms.adminRuleContext(selectedId.value, selectedEntity.value!.name), + enabled: computed(() => selectedId.value.length > 0 && selectedEntity.value !== null), + staleTime: 10_000, +}); +const contextNodes = computed(() => contextQuery.data.value?.nodes ?? []); +const contextTitle = computed(() => + selectedEntity.value ? `${selectedId.value} · ${selectedEntity.value.name}` : selectedId.value, +); +const contextError = computed(() => { + const e = contextQuery.error.value; + return e instanceof Error ? e.message : e ? String(e) : ''; +}); + +function openEntity(scope: string, name: string): void { + selectedEntity.value = { scope, name }; +} + +/* A node carries a populated body only while it's actually evaluating + * the entity; OAP omits state/window on the other nodes. */ +function isEvaluating(ctx: AlarmRunningContext | null): ctx is AlarmRunningContext { + return !!ctx && (ctx.size > 0 || !!ctx.currentState || ctx.windowValues.length > 0); +} +function stateClass(state?: string): string { + if (!state) return ''; + if (state.includes('FIRING')) return state.includes('SILENCED') ? 'is-warn' : 'is-fire'; + if (state.includes('RECOVERY')) return 'is-recov'; + return ''; +} +function fmtLastAlarm(ts: number | string): string { + const n = typeof ts === 'string' ? Number(ts) : ts; + if (!n || Number.isNaN(n)) return '—'; + return new Date(n).toLocaleString(); +} +/* OAP emits window/bucket times in the SERVER's local wall-clock, with + * no zone marker — so they must be re-anchored to a real instant via the + * server's UTC offset, then rendered in the BROWSER's local zone. That + * keeps them on the same clock as the epoch-derived "last alarm" time and + * the rest of the UI. Offset unknown (server unreachable) → fall back to + * the raw server wall-clock rather than guessing. */ +function pad2(n: number): string { + return String(n).padStart(2, '0'); +} +function oapPartsToEpoch(y: number, mo: number, d: number, h: number, mi: number, s: number): number | null { + const tz = timezone.value; + if (tz === undefined || tz === null) return null; + return Date.UTC(y, mo - 1, d, h, mi, s) - tz * 60_000; +} +/* Metric bucket ids are zero-padded YYYYMMDDHH(mm)(ss). The alarm window + * is minute-granular, so render HH:mm in the browser zone. */ +function fmtBucketTime(id: string): string { + const y = +id.slice(0, 4); + const mo = +id.slice(4, 6); + const d = +id.slice(6, 8); + const h = id.length >= 10 ? +id.slice(8, 10) : 0; + const mi = id.length >= 12 ? +id.slice(10, 12) : 0; + const s = id.length >= 14 ? +id.slice(12, 14) : 0; + const e = oapPartsToEpoch(y, mo, d, h, mi, s); + if (e === null) return id.length >= 12 ? `${pad2(h)}:${pad2(mi)}` : id; + const dd = new Date(e); + return `${pad2(dd.getHours())}:${pad2(dd.getMinutes())}`; +} +/* `endTime` is an OAP-server-local datetime string (`2026-06-01T06:42:00.000`, + * no zone marker). Convert to browser-local; pass through unparseable input. */ +function fmtEndTime(s?: string): string { + if (!s) return '—'; + const m = /^(\d{4})-(\d{2})-(\d{2})[T ](\d{2}):(\d{2})(?::(\d{2}))?/.exec(s); + if (!m) return s; + const e = oapPartsToEpoch(+m[1], +m[2], +m[3], +m[4], +m[5], +(m[6] ?? 0)); + return e === null ? s : new Date(e).toLocaleString(); +} +function parseSnapshot(json: string): AlarmMqeSnapshotSeries[] { + try { + const v = JSON.parse(json) as unknown; + return Array.isArray(v) ? (v as AlarmMqeSnapshotSeries[]) : []; + } catch { + return []; + } +} +function sparkValues(series: AlarmMqeSnapshotSeries): Array<number | null> { + return series.values.map((v) => (v.isEmptyValue ? null : v.doubleValue)); +} </script> <template> @@ -250,14 +377,28 @@ const detailNodes = computed(() => detailQuery.data.value?.nodes ?? []); </div> </section> - <section v-if="detail.runningEntities.length > 0" class="ar__sec"> + <section v-if="watching.length > 0" class="ar__sec"> <div class="ar__kicker-s"> - {{ t('Currently watching ({n})', { n: detail.runningEntities.length }) }} + {{ t('Currently watching ({n})', { n: watching.length }) }} </div> <ul class="ar__entity-list"> - <li v-for="re in detail.runningEntities" :key="`${re.scope}/${re.name}`"> + <li + v-for="re in watching" + :key="`${re.node}/${re.scope}/${re.name}`" + class="ar__entity-row" + role="button" + tabindex="0" + :title="t('Show running context for {name}', { name: re.name })" + @click="openEntity(re.scope, re.name)" + @keydown.enter.prevent="openEntity(re.scope, re.name)" + @keydown.space.prevent="openEntity(re.scope, re.name)" + > <span class="ar__tag">{{ re.scope }}</span> <code>{{ re.name }}</code> + <span v-if="re.node" class="ar__entity-node"> + <span class="ar__entity-node-lbl">{{ t('node') }}</span> + <code>{{ re.node }}</code> + </span> </li> </ul> </section> @@ -289,6 +430,84 @@ const detailNodes = computed(() => detailQuery.data.value?.nodes ?? []); </aside> </div> </template> + + <Modal + :open="selectedEntity !== null" + :title="contextTitle" + width="660px" + @close="selectedEntity = null" + > + <div class="arc"> + <pre v-if="detail" class="ar__expr arc__expr">{{ detail.expression }}</pre> + + <div v-if="contextQuery.isPending.value" class="arc__msg">{{ t('Reading running context…') }}</div> + <div v-else-if="contextQuery.isError.value" class="arc__msg arc__msg--err"> + {{ t('Running context unavailable.') }} <code>{{ contextError }}</code> + </div> + <div v-else-if="contextNodes.length === 0" class="arc__msg"> + {{ t('No running context returned for this entity.') }} + </div> + <template v-else> + <div v-for="n in contextNodes" :key="n.address" class="arc__node"> + <div class="arc__node-head"> + <span class="ar__dot" :class="n.ok ? 'is-ok' : 'is-err'" /> + <code class="ar__inst-addr">{{ n.address }}</code> + <span + v-if="n.context?.currentState" + class="arc__state" + :class="stateClass(n.context.currentState)" + >{{ n.context.currentState }}</span> + </div> + + <div v-if="n.error" class="arc__msg arc__msg--err">{{ n.error }}</div> + <div v-else-if="!isEvaluating(n.context)" class="arc__msg"> + {{ t('Not evaluated on this instance.') }} + </div> + <template v-else> + <div class="ar__meta-grid arc__grid"> + <div><span class="ar__lbl">{{ t('window') }}</span><span>{{ t('{n}m', { n: n.context?.size }) }}</span></div> + <div><span class="ar__lbl">{{ t('silence left') }}</span><span>{{ n.context?.silenceCountdown }}</span></div> + <div><span class="ar__lbl">{{ t('recovery left') }}</span><span>{{ n.context?.recoveryObservationCountdown }}</span></div> + <div v-if="n.context?.endTime"><span class="ar__lbl">{{ t('window end') }}</span><span>{{ fmtEndTime(n.context.endTime) }}</span></div> + </div> + <div class="arc__last"> + <span class="ar__lbl">{{ t('last alarm') }}</span> + <span class="arc__last-t">{{ fmtLastAlarm(n.context?.lastAlarmTime ?? 0) }}</span> + <span v-if="n.context?.lastAlarmMessage" class="arc__last-msg">{{ n.context.lastAlarmMessage }}</span> + </div> + + <div + v-for="(json, metric) in (n.context?.mqeMetricsSnapshot ?? {})" + :key="metric" + class="arc__metric" + > + <div class="arc__metric-head"><code>{{ metric }}</code></div> + <div v-for="(series, si) in parseSnapshot(json)" :key="si" class="arc__series"> + <Sparkline :values="sparkValues(series)" :width="280" :height="38" fluid :stroke="1.5" class="arc__spark" /> + <div class="arc__axis"> + <div + v-for="(v, vi) in series.values" + :key="v.id" + class="arc__tick" + :class="{ 'is-empty': v.isEmptyValue }" + :style="{ left: series.values.length > 1 ? (vi / (series.values.length - 1)) * 100 + '%' : '50%' }" + > + <span class="arc__tick-v">{{ v.isEmptyValue ? '—' : v.doubleValue }}</span> + <span class="arc__tick-t">{{ fmtBucketTime(v.id) }}</span> + </div> + </div> + </div> + </div> + </template> + </div> + </template> + + <details class="arc__raw"> + <summary>{{ t('raw context') }}</summary> + <pre>{{ JSON.stringify(contextQuery.data.value ?? {}, null, 2) }}</pre> + </details> + </div> + </Modal> </div> </template> @@ -564,6 +783,15 @@ const detailNodes = computed(() => detailQuery.data.value?.nodes ?? []); align-items: center; gap: 6px; } +.ar__entity-row { + cursor: pointer; + padding: 2px 4px; + margin: 0 -4px; + border-radius: 4px; + outline: none; +} +.ar__entity-row:hover { background: var(--sw-bg-2); } +.ar__entity-row:focus-visible { box-shadow: inset 0 0 0 1px var(--sw-accent); } .ar__entity-list code { font-family: var(--sw-mono); font-size: var(--sw-fs-sm); @@ -572,6 +800,32 @@ const detailNodes = computed(() => detailQuery.data.value?.nodes ?? []); padding: 1px 5px; border-radius: 3px; } +/* Per-entity node tag — which OAP instance is evaluating this entity. + * Right-aligned so the scope + entity name read as the primary column + * and the node reads as a trailing annotation. */ +.ar__entity-node { + margin-left: auto; + display: inline-flex; + align-items: center; + gap: 5px; + flex-shrink: 0; +} +.ar__entity-node-lbl { + font-size: var(--sw-fs-xs); + font-weight: var(--sw-fw-bold); + text-transform: uppercase; + letter-spacing: var(--sw-ls-caps); + color: var(--sw-fg-3); +} +.ar__entity-node code { + font-family: var(--sw-mono); + font-size: var(--sw-fs-xs); + color: var(--sw-fg-1); + background: var(--sw-bg-2); + border: 1px solid var(--sw-line); + padding: 1px 5px; + border-radius: 3px; +} .ar__node-table { width: 100%; border-collapse: collapse; @@ -605,4 +859,106 @@ const detailNodes = computed(() => detailQuery.data.value?.nodes ?? []); } .ar__dot.is-ok { background: var(--sw-ok); } .ar__dot.is-err { background: var(--sw-err); } + +/* ── Running-context popup ──────────────────────────────────────── */ +.arc { display: flex; flex-direction: column; gap: 14px; } +.arc__expr { margin: 0; } +.arc__msg { + font-size: var(--sw-fs-sm); + color: var(--sw-fg-3); + font-style: italic; +} +.arc__msg--err { color: var(--sw-err); font-style: normal; } +.arc__msg code { + font-family: var(--sw-mono); + font-style: normal; + font-size: var(--sw-fs-xs); + color: var(--sw-fg-1); + background: var(--sw-bg-2); + padding: 1px 5px; + border-radius: 3px; +} +.arc__node { + border: 1px solid var(--sw-line); + border-radius: 6px; + padding: 10px 12px; + background: var(--sw-bg-2); + display: flex; + flex-direction: column; + gap: 10px; +} +.arc__node-head { display: flex; align-items: center; gap: 8px; } +.arc__state { + margin-left: auto; + font-size: var(--sw-fs-xs); + font-weight: var(--sw-fw-bold); + letter-spacing: var(--sw-ls-caps); + text-transform: uppercase; + padding: 1px 7px; + border-radius: 3px; + border: 1px solid var(--sw-line); + color: var(--sw-fg-2); +} +.arc__state.is-fire { color: var(--sw-err); border-color: var(--sw-err); } +.arc__state.is-warn { color: var(--sw-warn); border-color: var(--sw-warn); } +.arc__state.is-recov { color: var(--sw-accent); border-color: var(--sw-accent); } +.arc__grid { grid-template-columns: repeat(2, 1fr); } +.arc__last { + display: flex; + align-items: baseline; + gap: 8px; + flex-wrap: wrap; + font-size: var(--sw-fs-sm); +} +.arc__last-t { font-variant-numeric: tabular-nums; color: var(--sw-fg-0); } +.arc__last-msg { color: var(--sw-fg-2); font-style: italic; } +.arc__metric { display: flex; flex-direction: column; gap: 6px; } +.arc__metric-head code { + font-family: var(--sw-mono); + font-size: var(--sw-fs-sm); + color: var(--sw-fg-0); + background: var(--sw-bg-1); + padding: 1px 5px; + border-radius: 3px; +} +/* Sparkline + value/time axis share one padded content box so the + * axis ticks (positioned at i/(n-1) of the box width, centered) sit + * directly under the line's points. The inline padding leaves room for + * the half-width overhang of the first/last centered ticks. */ +.arc__series { position: relative; padding: 0 24px; } +.arc__spark { display: block; width: 100%; } +.arc__axis { position: relative; height: 32px; margin-top: 3px; } +.arc__tick { + position: absolute; + top: 0; + transform: translateX(-50%); + display: flex; + flex-direction: column; + align-items: center; + gap: 1px; + white-space: nowrap; +} +.arc__tick.is-empty { opacity: 0.45; } +.arc__tick-v { font-size: var(--sw-fs-sm); color: var(--sw-fg-0); font-variant-numeric: tabular-nums; } +.arc__tick-t { font-size: var(--sw-fs-xs); color: var(--sw-fg-3); font-variant-numeric: tabular-nums; } +.arc__raw { font-size: var(--sw-fs-xs); } +.arc__raw summary { + cursor: pointer; + color: var(--sw-fg-3); + text-transform: uppercase; + letter-spacing: var(--sw-ls-caps); + font-weight: var(--sw-fw-bold); +} +.arc__raw pre { + margin: 8px 0 0; + max-height: 240px; + overflow: auto; + font-family: var(--sw-mono); + font-size: var(--sw-fs-xs); + color: var(--sw-fg-1); + background: var(--sw-bg-2); + border: 1px solid var(--sw-line); + border-radius: 5px; + padding: 8px 10px; +} </style> diff --git a/apps/ui/src/i18n/locales/en.json b/apps/ui/src/i18n/locales/en.json index add6647..236a926 100644 --- a/apps/ui/src/i18n/locales/en.json +++ b/apps/ui/src/i18n/locales/en.json @@ -650,6 +650,17 @@ "NONE — this user would be rejected at login (no matching mapping)": "NONE — this user would be rejected at login (no matching mapping)", "No pinned layers. Add one from the palette below.": "No pinned layers. Add one from the palette below.", "No rules match.": "No rules match.", + "No running context returned for this entity.": "No running context returned for this entity.", + "Not evaluated on this instance.": "Not evaluated on this instance.", + "Reading running context…": "Reading running context…", + "Running context unavailable.": "Running context unavailable.", + "Show running context for {name}": "Show running context for {name}", + "last alarm": "last alarm", + "raw context": "raw context", + "recovery left": "recovery left", + "silence left": "silence left", + "window": "window", + "window end": "window end", "No user entry found for": "No user entry found for", "No users match the current filter.": "No users match the current filter.", "Note": "Note", diff --git a/packages/api-client/src/alarm-status.ts b/packages/api-client/src/alarm-status.ts index db77730..c66e472 100644 --- a/packages/api-client/src/alarm-status.ts +++ b/packages/api-client/src/alarm-status.ts @@ -96,17 +96,70 @@ export interface AlarmRuleDetail { includeMetrics: string[]; } -/** Returned by `/status/alarm/{ruleId}/{entityName}` — per-entity - * running window state. Used for the "what's the rule currently - * seeing for this entity?" pane. */ +/** One raw metric reading inside a `windowValues` bucket. `value` is a + * string on the wire (OAP serialises the metric's stored value as-is). */ +export interface AlarmWindowMetric { + name: string; + timeBucket: number; + value: string; +} + +/** One bucket of the rule's sliding evaluation window. `index` runs + * 0..size-1; `metrics` is empty for buckets that received no data. */ +export interface AlarmWindowBucket { + index: number; + metrics: AlarmWindowMetric[]; +} + +/** One value point in a parsed MQE snapshot series. */ +export interface AlarmMqeSnapshotValue { + id: string; + doubleValue: number; + isEmptyValue: boolean; +} + +/** A single MQE series as produced by the alarm checker. Lives inside + * the `mqeMetricsSnapshot` map JSON-encoded per metric — callers parse + * the string value into `AlarmMqeSnapshotSeries[]`. */ +export interface AlarmMqeSnapshotSeries { + metric: { labels: Array<{ key: string; value: string }> }; + values: AlarmMqeSnapshotValue[]; +} + +/** Returned by `/status/alarm/{ruleId}/{entityName}` — the rule's + * running window state for ONE entity, per OAP node. Only the node + * actually evaluating the entity returns a populated body; other nodes + * return a stub (`size: 0`, empty `windowValues`, `lastAlarmTime: 0`) + * and OMIT the evaluation-only fields (`currentState`, `entityName`, + * `mqeMetricsSnapshot`, …) — hence the optionals. */ export interface AlarmRunningContext { - ruleName: string; - entity: string; - /** Sliding window snapshot — bucket-per-metric values currently in - * the rule's evaluation window. Shape varies by rule; the UI - * renders as raw JSON for now. */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - [key: string]: any; + ruleId: string; + expression: string; + /** Window end as an OAP-server-local datetime string. Absent on a + * node that isn't evaluating this entity. */ + endTime?: string; + additionalPeriod: number; + /** Window size = `period + additionalPeriod`. `0` on a non-evaluating + * node. */ + size: number; + silencePeriod?: number; + recoveryObservationPeriod?: number; + /** Silence countdown; `-1` means not running. */ + silenceCountdown: number; + recoveryObservationCountdown: number; + /** e.g. `FIRING` / `SILENCED_FIRING` / `RECOVERY_OBSERVATION`. Absent + * when this node isn't evaluating the entity. */ + currentState?: string; + entityName?: string; + windowValues: AlarmWindowBucket[]; + /** Metric name → JSON-encoded `AlarmMqeSnapshotSeries[]` (the data the + * expression was evaluated against this tick). */ + mqeMetricsSnapshot?: Record<string, string>; + /** Epoch-ms of the last fire; `0` once recovered. Wire type is loose + * (number or numeric string), so callers coerce. */ + lastAlarmTime: number | string; + lastAlarmMessage?: string; + lastAlarmMqeMetricsSnapshot?: Record<string, string>; } export class AlarmStatusApiError extends Error { diff --git a/packages/api-client/src/index.ts b/packages/api-client/src/index.ts index dbb8651..6b06961 100644 --- a/packages/api-client/src/index.ts +++ b/packages/api-client/src/index.ts @@ -210,6 +210,10 @@ export { type AlarmRuleList, type AlarmRuleDetail, type AlarmRunningContext, + type AlarmWindowBucket, + type AlarmWindowMetric, + type AlarmMqeSnapshotSeries, + type AlarmMqeSnapshotValue, type ClusterAlarmStatus, type InstanceAlarmStatus, } from './alarm-status.js';
