This is an automated email from the ASF dual-hosted git repository.
bbovenzi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new dc27827de26 UI: Fix Dag code highlighting for f-strings (#68026)
dc27827de26 is described below
commit dc27827de265f9aa49ecb1f4698c521c939df3fb
Author: Revanth <[email protected]>
AuthorDate: Thu Jun 4 13:40:45 2026 -0500
UI: Fix Dag code highlighting for f-strings (#68026)
---
.../src/components/MonacoEditor/configureMonaco.ts | 40 +++--
.../components/MonacoEditor/pythonFStrings.test.ts | 183 +++++++++++++++++++++
.../src/components/MonacoEditor/pythonFStrings.ts | 100 +++++++++++
airflow-core/src/airflow/ui/src/vite-env.d.ts | 11 ++
4 files changed, 323 insertions(+), 11 deletions(-)
diff --git
a/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts
index 66af2adee05..737de9e020c 100644
--- a/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts
+++ b/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts
@@ -18,6 +18,8 @@
*/
import { loader } from "@monaco-editor/react";
+import { patchPythonFStrings } from "./pythonFStrings";
+
type MonacoEnvironment = {
readonly getWorker: (_moduleId: string, label: string) => Worker;
};
@@ -50,18 +52,18 @@ const loadMonacoModules = async () => {
import("monaco-editor/esm/vs/language/json/json.worker.js?worker&url").then((module)
=> module.default),
]);
- const languageContributions = Promise.all([
- import("monaco-editor/esm/vs/basic-languages/python/python.contribution"),
- import("monaco-editor/esm/vs/language/json/monaco.contribution"),
- ]);
+ // The JSON contribution registers its language as a side effect. Python is
registered
+ // manually below from its grammar module instead of importing
`python.contribution`,
+ // whose lazy tokens provider would overwrite our patched grammar on first
use.
+ // The grammar module is a private monaco internal (verified against
monaco-editor
+ // 0.52.2); the runtime guard below fails loudly if its export shape changes.
+ const jsonContribution =
import("monaco-editor/esm/vs/language/json/monaco.contribution");
+ const pythonGrammar =
import("monaco-editor/esm/vs/basic-languages/python/python.js");
- const [monaco, [editorWorkerUrl, jsonWorkerUrl]] = await Promise.all([
- monacoApi,
- workerUrls,
- languageContributions,
- ]);
+ const [monaco, [editorWorkerUrl, jsonWorkerUrl], { conf: pythonConf,
language: pythonLanguage }] =
+ await Promise.all([monacoApi, workerUrls, pythonGrammar,
jsonContribution]);
- return { editorWorkerUrl, jsonWorkerUrl, monaco };
+ return { editorWorkerUrl, jsonWorkerUrl, monaco, pythonConf, pythonLanguage
};
};
const createWorkerFromUrl = (workerUrl: string): Worker => {
@@ -78,12 +80,28 @@ export const configureMonaco = () => {
}
configurationPromise = loadMonacoModules()
- .then(({ editorWorkerUrl, jsonWorkerUrl, monaco }) => {
+ .then(({ editorWorkerUrl, jsonWorkerUrl, monaco, pythonConf,
pythonLanguage }) => {
Reflect.set(globalThis, "MonacoEnvironment", {
getWorker: (_moduleId: string, label: string) =>
createWorkerFromUrl(label === "json" ? jsonWorkerUrl :
editorWorkerUrl),
} satisfies MonacoEnvironment);
+ // Register Python with the patched grammar (triple-quoted f-string
support). The
+ // editor always sets `language="python"` explicitly, so no
extensions/firstLine
+ // auto-detection metadata is needed. Guard the internal grammar export
shape: if a
+ // monaco upgrade drops these, fail loudly here rather than silently
disabling
+ // Python highlighting (`setMonarchTokensProvider("python", undefined)`).
+ // The `conf`/`language` types come from a hand-written ambient
declaration, so
+ // TypeScript believes they are always defined; this guard checks the
real runtime
+ // shape the types cannot vouch for.
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+ if (pythonConf === undefined || pythonLanguage === undefined) {
+ throw new Error("monaco Python grammar module changed shape: missing
`conf`/`language` export");
+ }
+ monaco.languages.register({ id: "python" });
+ monaco.languages.setLanguageConfiguration("python", pythonConf);
+ monaco.languages.setMonarchTokensProvider("python",
patchPythonFStrings(pythonLanguage));
+
loader.config({ monaco });
})
.catch((error: unknown) => {
diff --git
a/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.test.ts
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.test.ts
new file mode 100644
index 00000000000..0fb4184a660
--- /dev/null
+++
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.test.ts
@@ -0,0 +1,183 @@
+/*!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import type { languages } from "monaco-editor";
+import { beforeAll, describe, expect, it } from "vitest";
+
+import { patchPythonFStrings } from "./pythonFStrings";
+
+// A minimal stand-in for Monaco's bundled Python grammar: enough states for
the
+// patch's references to resolve and for the immutability check below. It is
not a
+// faithful copy of the real `strings` state (which the patch replaces
wholesale).
+const buildBaseLanguage = (): languages.IMonarchLanguage => ({
+ defaultToken: "",
+ tokenizer: {
+ fDblStringBody: [[/"/u, "string.escape", "@popall"]],
+ fStringBody: [[/'/u, "string.escape", "@popall"]],
+ fStringDetail: [[/x/u, "identifier", "@pop"]],
+ root: [{ include: "@strings" }],
+ strings: [
+ [/f"{1,3}/u, "string.escape", "@fDblStringBody"],
+ [/f'{1,3}/u, "string.escape", "@fStringBody"],
+ ],
+ },
+});
+
+const firstSources = (rules: Array<languages.IMonarchLanguageRule>):
Array<string> =>
+ rules.map((rule) => (Array.isArray(rule) && rule[0] instanceof RegExp ?
rule[0].source : ""));
+
+const typesAt = (lines: Array<Array<string>>, index: number): Array<string> =>
lines[index] ?? [];
+
+const tokenTypes = (line: Array<{ type: string }>): Array<string> =>
line.map((token) => token.type);
+
+describe("patchPythonFStrings", () => {
+ it("does not mutate the input grammar", () => {
+ const base = buildBaseLanguage();
+ const snapshot = JSON.stringify(base, (_key, value: unknown) =>
+ value instanceof RegExp ? value.source : value,
+ );
+
+ patchPythonFStrings(base);
+
+ expect(
+ JSON.stringify(base, (_key, value: unknown) => (value instanceof RegExp
? value.source : value)),
+ ).toBe(snapshot);
+ });
+
+ it("adds dedicated triple-quoted f-string states for both quote styles", ()
=> {
+ const patched = patchPythonFStrings(buildBaseLanguage());
+
+ expect(patched.tokenizer.fStringBodyTriple).toBeDefined();
+ expect(patched.tokenizer.fDblStringBodyTriple).toBeDefined();
+ });
+
+ it("preserves the base single-line f-string states", () => {
+ const patched = patchPythonFStrings(buildBaseLanguage());
+
+ expect(patched.tokenizer.fStringBody).toBeDefined();
+ expect(patched.tokenizer.fDblStringBody).toBeDefined();
+ expect(patched.tokenizer.fStringDetail).toBeDefined();
+ });
+
+ it.each(["fStringBody", "fDblStringBody"])(
+ "prepends escaped-brace rules to the single-line %s state",
+ (state) => {
+ const patched = patchPythonFStrings(buildBaseLanguage());
+ const sources = firstSources(patched.tokenizer[state] as
Array<languages.IMonarchLanguageRule>);
+
+ expect(sources[0]).toBe("\\{\\{");
+ expect(sources[1]).toBe("\\}\\}");
+ },
+ );
+
+ it("routes triple quotes before single quotes in the strings state", () => {
+ const patched = patchPythonFStrings(buildBaseLanguage());
+ const sources = firstSources(patched.tokenizer.strings as
Array<languages.IMonarchLanguageRule>);
+
+ const tripleDbl = sources.findIndex((source) => source.includes('"""'));
+ const singleDbl = sources.indexOf('[Ff]"');
+ const tripleSingle = sources.findIndex((source) => source.includes("'''"));
+ const singleSingle = sources.indexOf("[Ff]'");
+
+ expect(tripleDbl).toBeGreaterThanOrEqual(0);
+ expect(tripleSingle).toBeGreaterThanOrEqual(0);
+ expect(tripleDbl).toBeLessThan(singleDbl);
+ expect(tripleSingle).toBeLessThan(singleSingle);
+ });
+
+ it.each([
+ ["fDblStringBodyTriple", '"""'],
+ ["fStringBodyTriple", "'''"],
+ ])("closes %s on its triple quote and matches escaped braces before
interpolation", (state, quote) => {
+ const patched = patchPythonFStrings(buildBaseLanguage());
+ const sources = firstSources(patched.tokenizer[state] as
Array<languages.IMonarchLanguageRule>);
+
+ const close = sources.findIndex((source) => source.includes(quote));
+ const escapedOpen = sources.findIndex((source) =>
source.includes("\\{\\{"));
+ const interpolation = sources.findIndex((source) =>
source.includes("[^!':=}]"));
+ const body = sources.findIndex((source) => source.startsWith("[^"));
+
+ expect(close).toBe(0);
+ expect(close).toBeLessThan(body);
+ expect(escapedOpen).toBeGreaterThanOrEqual(0);
+ expect(escapedOpen).toBeLessThan(interpolation);
+ });
+});
+
+// Exercises the patched grammar through Monaco's real Monarch tokenizer to
prove
+// the fix end to end. Mirrors files/dags/fstring_repro.py from issue #67986.
+// Loads full monaco and registers the language once for the whole suite.
+describe("patchPythonFStrings (tokenized)", () => {
+ let lines: Array<Array<string>> = [];
+ let singleLineTokens: Array<{ offset: number; type: string }> = [];
+
+ beforeAll(async () => {
+ const monaco = await import("monaco-editor/esm/vs/editor/editor.api");
+ const { conf, language } = await
import("monaco-editor/esm/vs/basic-languages/python/python.js");
+
+ monaco.languages.register({ id: "python" });
+ monaco.languages.setLanguageConfiguration("python", conf);
+ monaco.languages.setMonarchTokensProvider("python",
patchPythonFStrings(language));
+
+ const source = [
+ ' sql = f"""',
+ " SELECT CASE WHEN COUNT(*) = 0 THEN 1 ELSE 0 END",
+ " FROM {table_name}",
+ " WHERE ds = '{{{{ ds }}}}'",
+ ' """',
+ " return sql",
+ ].join("\n");
+
+ lines = monaco.editor.tokenize(source, "python").map(tokenTypes);
+
+ // Single-line f-string: `{{` at offset 2 (after `f"`) is a literal brace,
+ // `{var}` is a real interpolation.
+ singleLineTokens = monaco.editor.tokenize('f"{{lit}} {var}"', "python")[0]
?? [];
+ }, 30_000);
+
+ it("keeps the whole multi-line f-string body string-colored", () => {
+ // A plain SQL line and the interpolation line both stay inside the string,
+ // rather than being re-tokenized as Python code after the first line.
+ expect(typesAt(lines, 1)).toEqual(["string.python"]);
+ expect(typesAt(lines, 2)).toContain("string.python");
+ // Escaped braces `{{{{ ds }}}}` are literal string content, not
interpolation.
+ expect(typesAt(lines, 3)).toEqual(["string.python"]);
+ });
+
+ it("colors interpolations inside the f-string as identifiers", () => {
+ // `FROM ` is string, `{table_name}` is an interpolation.
+ expect(typesAt(lines, 2)).toContain("identifier.python");
+ });
+
+ it("ends the string at the closing triple quote so following code is not
string-colored", () => {
+ // The closing `"""` terminates the string...
+ expect(typesAt(lines, 4)).toContain("string.escape.python");
+ // ...so `return sql` is real Python again, not part of the string.
+ expect(typesAt(lines, 5)).toContain("keyword.python");
+ expect(typesAt(lines, 5)).not.toContain("string.python");
+ });
+
+ it("treats single-line escaped braces as string and real interpolation as
identifier", () => {
+ // `{{` (offset 2) is a literal brace rendered as string, not
interpolation.
+ const braceToken = singleLineTokens.find((token) => token.offset === 2);
+
+ expect(braceToken?.type).toBe("string.python");
+ // `{var}` is still a real interpolation.
+ expect(singleLineTokens.some((token) => token.type ===
"identifier.python")).toBe(true);
+ });
+});
diff --git
a/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.ts
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.ts
new file mode 100644
index 00000000000..46bed43c658
--- /dev/null
+++ b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.ts
@@ -0,0 +1,100 @@
+/*!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import type { languages } from "monaco-editor";
+
+// Escaped f-string braces: `{{` / `}}` are literal `{` / `}`, not
interpolation.
+// Matching two braces wins over the single-brace interpolation rule, and a
single
+// `{` still falls through to interpolation, so these can sit at the front of a
+// single-line state or before the interpolation rule in a triple state.
+const ESCAPED_BRACE_RULES: Array<languages.IMonarchLanguageRule> = [
+ [/\{\{/u, "string"],
+ [/\}\}/u, "string"],
+];
+
+/**
+ * Monaco's bundled Python Monarch grammar mishandles f-strings two ways:
+ *
+ * 1. Triple-quoted f-strings: `f"""..."""` is routed to the single-line
f-string
+ * state whose first rule does `@popall` at end of line, so a multi-line
+ * f-string loses tokenizer sync after its first line: the string leaks past
+ * its closing `"""` (coloring following code as string) or terminates
early.
+ * 2. Escaped braces: `{{` / `}}` are mis-parsed as interpolation rather than
as
+ * literal string content.
+ *
+ * See https://github.com/apache/airflow/issues/67986.
+ *
+ * This patch:
+ * - rewrites the `strings` state so triple quotes route to two newly added
+ * multi-line states (`fStringBodyTriple` / `fDblStringBodyTriple`); and
+ * - prepends escaped-brace rules to the single-line `fStringBody` /
+ * `fDblStringBody` states.
+ *
+ * Every other state is left untouched. The triple states stay inside the
string
+ * across line breaks, treat escaped `{{` / `}}` as literal string content,
color
+ * `{...}` interpolations via the existing `fStringDetail` state, and exit
only on
+ * the matching triple quote. Returns a new grammar object; `language` is never
+ * mutated.
+ *
+ * Caveats:
+ * - This reaches into monaco's bundled basic-language grammar and references
its
+ * internal state names (`fStringDetail`, `fStringBody`, `fDblStringBody`).
+ * Verified against monaco-editor 0.52.2; recheck on monaco upgrades (the
+ * tokenizer test in this folder is the guard).
+ * - Raw f-strings (`rf"""` / `fr"""`) are out of scope and remain unhandled,
as
+ * they already were in the bundled grammar.
+ */
+export const patchPythonFStrings = (language: languages.IMonarchLanguage):
languages.IMonarchLanguage => {
+ const tokenizer: languages.IMonarchLanguage["tokenizer"] = {
+ ...language.tokenizer,
+ fDblStringBody: [...ESCAPED_BRACE_RULES,
...(language.tokenizer.fDblStringBody ?? [])],
+ fDblStringBodyTriple: [
+ [/"""/u, "string.escape", "@popall"],
+ ...ESCAPED_BRACE_RULES,
+ [/\{[^!':=}]+/u, "identifier", "@fStringDetail"],
+ [/\\./u, "string"],
+ [/\\$/u, "string"],
+ [/[^"\\{}]+/u, "string"],
+ [/["{}]/u, "string"],
+ ],
+ fStringBody: [...ESCAPED_BRACE_RULES, ...(language.tokenizer.fStringBody
?? [])],
+ fStringBodyTriple: [
+ [/'''/u, "string.escape", "@popall"],
+ ...ESCAPED_BRACE_RULES,
+ [/\{[^!':=}]+/u, "identifier", "@fStringDetail"],
+ [/\\./u, "string"],
+ [/\\$/u, "string"],
+ [/[^'\\{}]+/u, "string"],
+ [/['{}]/u, "string"],
+ ],
+ // Triple-quote rules must precede the single-quote rules so `f"""` is not
+ // consumed as `f"` + `""`. `[Ff]` covers both `f"""` and `F"""`.
+ strings: [
+ [/'$/u, "string.escape", "@popall"],
+ [/[Ff]'''/u, "string.escape", "@fStringBodyTriple"],
+ [/[Ff]'/u, "string.escape", "@fStringBody"],
+ [/'/u, "string.escape", "@stringBody"],
+ [/"$/u, "string.escape", "@popall"],
+ [/[Ff]"""/u, "string.escape", "@fDblStringBodyTriple"],
+ [/[Ff]"/u, "string.escape", "@fDblStringBody"],
+ [/"/u, "string.escape", "@dblStringBody"],
+ ],
+ };
+
+ return { ...language, tokenizer };
+};
diff --git a/airflow-core/src/airflow/ui/src/vite-env.d.ts
b/airflow-core/src/airflow/ui/src/vite-env.d.ts
index c7f6825d014..e520ef77aa6 100644
--- a/airflow-core/src/airflow/ui/src/vite-env.d.ts
+++ b/airflow-core/src/airflow/ui/src/vite-env.d.ts
@@ -29,3 +29,14 @@ interface ImportMeta {
// no typings of their own.
declare module "monaco-editor/esm/vs/editor/contrib/folding/browser/folding";
declare module "monaco-editor/esm/vs/base/browser/ui/codicons/codiconStyles";
+
+// The Python basic-language module exports its Monarch grammar (`conf` /
`language`)
+// but ships no `.d.ts` of its own.
+declare module "monaco-editor/esm/vs/basic-languages/python/python.js" {
+ // `import(...)` type syntax is required here: a top-level `import type`
would turn this
+ // ambient declaration file into a module and break the `ImportMeta`
augmentation above.
+ /* eslint-disable @typescript-eslint/consistent-type-imports */
+ export const conf: import("monaco-editor").languages.LanguageConfiguration;
+ export const language: import("monaco-editor").languages.IMonarchLanguage;
+ /* eslint-enable @typescript-eslint/consistent-type-imports */
+}