Re: [PR] feat(semantic-layers): containment-aware query cache [superset]

via GitHub Wed, 20 May 2026 01:08:20 -0700


bito-code-review[bot] commented on code in PR #40221:
URL: https://github.com/apache/superset/pull/40221#discussion_r3272237008



##########
superset-frontend/src/features/semanticLayers/jsonFormsHelpers.tsx:
##########
@@ -255,14 +255,96 @@ const EnumNamesRenderer = 
withJsonFormsControlProps(EnumNamesControl);
 const enumNamesEntry = {
   // Rank 5: higher than the default string renderer (2–3) so this fires
   // whenever x-enumNames is present, regardless of the underlying type.
+  // Array-of-enum schemas are handled by ``multiEnumEntry`` below — this
+  // renderer only targets scalar string/number controls.
   tester: rankWith(
     5,
+    and(
+      schemaMatches(s => {
+        const names = (s as Record<string, unknown>)['x-enumNames'];
+        return Array.isArray(names) && (names as unknown[]).length > 0;
+      }),
+      schemaMatches(s => (s as Record<string, unknown>)?.type !== 'array'),
+    ),
+  ),
+  renderer: EnumNamesRenderer,
+};
+
+/**
+ * Renderer for ``{type: 'array', items: {enum: [...]}}`` schemas.  Renders
+ * a single Antd Select with ``mode="multiple"`` (tag-style multi-select),
+ * matching the natural expectation of a "pick several from a list" control.
+ *
+ * Without this, the default ``PrimitiveArrayControl`` from the upstream
+ * library renders an "Add …" button that creates one single-select per
+ * element — visually wrong for an enum multi-select and unable to display
+ * ``items.x-enumNames`` labels.
+ *
+ * The renderer is dynamic-aware: when the host form is refreshing the
+ * schema (e.g. compatible options narrowing as the user picks), the Select
+ * shows a loading indicator without becoming disabled, so the user can
+ * continue editing while options refresh.
+ */
+function MultiEnumControl(props: ControlProps) {
+  const { refreshingSchema } = props.config ?? {};
+  const arraySchema = props.schema as Record<string, unknown>;
+  const itemsSchema =
+    (arraySchema.items as Record<string, unknown>) ??
+    ({} as Record<string, unknown>);
+
+  const enumValues = (itemsSchema.enum as unknown[]) ?? [];
+  const enumNames =
+    (itemsSchema['x-enumNames'] as string[]) ?? enumValues.map(String);
+
+  const options = enumValues.map((value, index) => ({
+    value: value as string | number,
+    label: enumNames[index] ?? String(value),
+  }));
+
+  const value = Array.isArray(props.data) ? (props.data as unknown[]) : [];
+
+  const tooltip = (props.uischema?.options as Record<string, unknown>)
+    ?.tooltip as string | undefined;
+
+  return (
+    <Form.Item label={props.label} tooltip={tooltip}>
+      <Select
+        mode="multiple"
+        value={value as (string | number)[]}
+        onChange={next => props.handleChange(props.path, next)}
+        options={options}
+        style={{ width: '100%' }}
+        disabled={!props.enabled}
+        loading={!!refreshingSchema}
+        allowClear
+        optionFilterProp="label"
+        placeholder={
+          (props.uischema?.options as Record<string, unknown>)
+            ?.placeholderText as string | undefined
+        }

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Missing translation wrapper</b></div>
   <div id="fix">
   
   Wrap the placeholderText from uischema options with the translation function 
`t()`, for example:
   ```jsx
   placeholder={
     t((props.uischema?.options as Record<string, unknown>)?.placeholderText as 
string)
   }
   ```
   </div>
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #983322</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



##########
superset-frontend/packages/superset-ui-core/src/components/CachedLabel/TooltipContent.tsx:
##########
@@ -23,15 +23,28 @@ import { extendedDayjs } from '../../utils/dates';
 
 interface Props {
   cachedTimestamp?: string;
+  cacheSource?: 'query' | 'semantic';
 }
-export const TooltipContent: FC<Props> = ({ cachedTimestamp }) => {
+export const TooltipContent: FC<Props> = ({
+  cachedTimestamp,
+  cacheSource = 'query',
+}) => {
+  const loadedFromText =
+    cacheSource === 'semantic'
+      ? t('Loaded from semantic smart cache')
+      : t('Loaded data cached');
+  const loadedFallbackText =
+    cacheSource === 'semantic'
+      ? t('Loaded from semantic smart cache')
+      : t('Loaded from cache');

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Semantic duplication in diff</b></div>
   <div id="fix">
   
   Lines 32-39 introduce semantic duplication: `loadedFromText` and 
`loadedFallbackText` contain nearly identical conditional logic checking 
`cacheSource === 'semantic'`. Both must be updated together if cache source 
logic changes, creating divergence risk. Consider consolidating into a single 
helper or inline ternary.
   </div>
   
   
   <details>
   <summary>
   <b>Code suggestion</b>
   </summary>
   <blockquote>Check the AI-generated fix before applying</blockquote>
   <div id="code">
   
   
   ```
    --- 
superset-frontend/packages/superset-ui-core/src/components/CachedLabel/TooltipContent.tsx
 (lines 32-39) ---
    32: -  const loadedFromText =
    33: -    cacheSource === 'semantic'
    34: -      ? t('Loaded from semantic smart cache')
    35: -      : t('Loaded data cached');
    36: -  const loadedFallbackText =
    37: -    cacheSource === 'semantic'
    38: -      ? t('Loaded from semantic smart cache')
    39: -      : t('Loaded from cache');
    40: +
    41: +  const getLoadedText = (useFallback: boolean) => {
    42: +    if (cacheSource === 'semantic') {
    43: +      return t('Loaded from semantic smart cache');
    44: +    }
    45: +    return useFallback ? t('Loaded from cache') : t('Loaded data 
cached');
    46: +  };
    47: +
    48:    const cachedText = cachedTimestamp ? (
    49:      <span>
    50: -      {loadedFromText}
    51: +      {getLoadedText(false)}
    52:        <b> {extendedDayjs.utc(cachedTimestamp).fromNow()}</b>
    53:      </span>
    54:    ) : (
    55: -    loadedFallbackText
    56: +    getLoadedText(true)
    57:    );
   ```
   
   </div>
   </details>
   
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #983322</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



##########
superset-frontend/packages/superset-ui-core/src/components/CachedLabel/TooltipContent.test.tsx:
##########
@@ -36,3 +36,14 @@ test('Rendering TooltipContent correctly - with timestep', 
() => {
       .fromNow()}. Click to force-refresh`,
   );
 });
+
+test('Rendering TooltipContent correctly - semantic cache', () => {
+  render(
+    <TooltipContent cacheSource="semantic" cachedTimestamp="01-01-2000" />,
+  );
+  expect(screen.getByTestId('tooltip-content')?.textContent).toBe(

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>data-testid attribute mismatch</b></div>
   <div id="fix">
   
   The component uses `data-test="tooltip-content"` but `getByTestId` from 
React Testing Library searches for `data-testid`. This mismatch causes the test 
to fail at runtime — the query returns `undefined` instead of finding the 
element. All three test cases (lines 26, 33, 44) use this pattern and will fail.
   </div>
   
   
   <details>
   <summary>
   <b>Code suggestion</b>
   </summary>
   <blockquote>Check the AI-generated fix before applying</blockquote>
   <div id="code">
   
   
   ```
    --- 
superset-frontend/packages/superset-ui-core/src/components/CachedLabel/TooltipContent.tsx
    +++ 
superset-frontend/packages/superset-ui-core/src/components/CachedLabel/TooltipContent.tsx
    @@ -48,7 +48,7 @@ export const TooltipContent: FC<Props> = ({
       );
    
       return (
    -    <span data-test="tooltip-content">
    +    <span data-testid="tooltip-content">
           {cachedText}. {t('Click to force-refresh')}
         </span>
       );
     };
   ```
   
   </div>
   </details>
   
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #983322</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



##########
superset/semantic_layers/cache.py:
##########
@@ -0,0 +1,759 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Containment-aware cache for semantic view queries.
+
+A broader cached result can satisfy a narrower new query: when the new query's
+filters and limit are strictly more restrictive than a cached entry's, the 
cached
+DataFrame is post-filtered and re-limited rather than re-executing the 
underlying
+query.
+
+See ``docs/`` and the plan file for the design rationale; the rules summary:
+
+* Same metrics and dimensions (shape).
+* Each cached filter must be implied by a new-query filter on the same column.
+* New filters on columns with no cached constraint are applied post-fetch as
+  "leftovers" — provided the column is in the projection.
+* Cached ``limit`` must be at least the new ``limit``; if a cached ``limit`` is
+  present, the orderings must match (otherwise the cached "top N" is not the
+  true top of the new query).
+* ``ADHOC`` and ``HAVING`` filters require exact-set equality.
+* ``offset != 0`` and mismatching ``group_limit`` skip the cache.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import time as _time
+from dataclasses import dataclass, field
+from datetime import date, datetime, time, timedelta
+from enum import Enum
+from typing import Any, Iterable
+
+import pandas as pd
+import pyarrow as pa
+from flask import current_app
+from superset_core.semantic_layers.types import (
+    AdhocExpression,
+    AggregationType,
+    Dimension,
+    Filter,
+    Metric,
+    Operator,
+    OrderDirection,
+    OrderTuple,
+    PredicateType,
+    SemanticQuery,
+    SemanticRequest,
+    SemanticResult,
+)
+
+from superset.extensions import cache_manager
+from superset.utils import json
+from superset.utils.hashing import hash_from_str
+from superset.utils.pandas_postprocessing.aggregate import aggregate
+
+logger = logging.getLogger(__name__)
+
+INDEX_KEY_PREFIX = "sv:idx:"
+VALUE_KEY_PREFIX = "sv:val:"
+MAX_ENTRIES_PER_VIEW = 128
+
+_AGGREGATION_TO_PANDAS: dict[AggregationType, str] = {
+    AggregationType.SUM: "sum",
+    AggregationType.COUNT: "sum",
+    AggregationType.MIN: "min",
+    AggregationType.MAX: "max",
+}
+ADDITIVE_AGGREGATIONS = frozenset(_AGGREGATION_TO_PANDAS)
+
+
+@dataclass(frozen=True)
+class ViewMeta:
+    """Identity/freshness/TTL info pulled from the SemanticView ORM row."""
+
+    uuid: str
+    changed_on_iso: str
+    cache_timeout: int | None
+
+
+class ReuseMode(Enum):
+    """How a cached entry maps onto the new query."""
+
+    EXACT = "exact"  # same dims and metrics; only leftovers/limit/order to 
apply
+    PROJECT = "project"  # same dims, cached metrics ⊃ new metrics; drop 
columns
+    ROLLUP = "rollup"  # cached dims ⊃ new dims; re-aggregate
+
+
+# Preference order when multiple cached entries satisfy a query: pick the one
+# requiring the least post-processing.
+_MODE_RANK: dict[ReuseMode, int] = {
+    ReuseMode.EXACT: 0,
+    ReuseMode.PROJECT: 1,
+    ReuseMode.ROLLUP: 2,
+}
+
+
+@dataclass(frozen=True)
+class CachedEntry:
+    filters: frozenset[Filter]
+    dimension_keys: frozenset[str]
+    metric_ids: frozenset[str]
+    limit: int | None
+    offset: int
+    order_key: str
+    group_limit_key: str
+    value_key: str
+    timestamp: float = field(default_factory=_time.time)
+
+
+# ---------------------------------------------------------------------------
+# Public surface
+# ---------------------------------------------------------------------------
+
+
+def try_serve_from_cache(
+    view_meta: ViewMeta,
+    query: SemanticQuery,
+) -> SemanticResult | None:
+    """Return a cached ``SemanticResult`` that satisfies ``query`` if any."""
+    try:
+        cache = cache_manager.data_cache
+        idx_key = shape_key(view_meta, query)
+        entries: list[CachedEntry] | None = cache.get(idx_key)
+        if not entries:
+            return None
+
+        # Rank candidates so EXACT is preferred over PROJECT over ROLLUP. With
+        # one bucket per view, multiple entries may satisfy a query; pick the
+        # one needing the least post-processing.
+        candidates: list[tuple[int, CachedEntry, set[Filter], ReuseMode]] = []
+        for entry in entries:
+            ok, leftovers, mode = can_satisfy(entry, query)
+            if ok:
+                candidates.append((_MODE_RANK[mode], entry, leftovers, mode))
+        candidates.sort(key=lambda c: c[0])
+
+        served: SemanticResult | None = None
+        for _, entry, leftovers, mode in candidates:
+            payload = cache.get(entry.value_key)
+            if payload is None:
+                continue
+            if mode == ReuseMode.ROLLUP and not _projection_input_complete(
+                entry, payload
+            ):
+                continue
+            served = _apply_post_processing(payload, query, leftovers, mode)
+            break
+
+        # Drop index entries whose values have been evicted.
+        pruned = [e for e in entries if cache.get(e.value_key) is not None]
+        if len(pruned) != len(entries):
+            cache.set(idx_key, pruned, timeout=_timeout(view_meta))
+        return served
+    except Exception:  # pragma: no cover - defensive
+        logger.warning("Semantic view cache lookup failed", exc_info=True)
+        return None
+
+
+def store_result(
+    view_meta: ViewMeta,
+    query: SemanticQuery,
+    result: SemanticResult,
+) -> None:
+    """Persist ``result`` under a fresh value key and register a descriptor."""
+    try:
+        cache = cache_manager.data_cache
+        timeout = _timeout(view_meta)
+        vkey = value_key(view_meta, query)
+        cache.set(vkey, result, timeout=timeout)
+
+        idx_key = shape_key(view_meta, query)
+        entries: list[CachedEntry] = list(cache.get(idx_key) or [])
+        entry = CachedEntry(
+            filters=frozenset(query.filters or set()),
+            dimension_keys=frozenset(_dimension_key(d) for d in 
query.dimensions),
+            metric_ids=frozenset(m.id for m in query.metrics),
+            limit=query.limit,
+            offset=query.offset or 0,
+            order_key=_order_key(query.order),
+            group_limit_key=_group_limit_key(query.group_limit),
+            value_key=vkey,
+        )
+        entries = [e for e in entries if e.value_key != vkey]
+        entries.append(entry)
+        if len(entries) > MAX_ENTRIES_PER_VIEW:
+            entries = sorted(entries, key=lambda e: 
e.timestamp)[-MAX_ENTRIES_PER_VIEW:]
+        cache.set(idx_key, entries, timeout=timeout)
+    except Exception:  # pragma: no cover - defensive

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Avoid catching blind exception</b></div>
   <div id="fix">
   
   Replace the broad `Exception` catch with specific exception types to improve 
error handling. Similar issue exists at line 170.
   </div>
   
   
   <details>
   <summary>
   <b>Code suggestion</b>
   </summary>
   <blockquote>Check the AI-generated fix before applying</blockquote>
   <div id="code">
   
   
   ````suggestion
       except (KeyError, ValueError, TypeError):  # pragma: no cover - defensive
   ````
   
   </div>
   </details>
   
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #983322</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat(semantic-layers): containment-aware query cache [superset]

Reply via email to