This is an automated email from the ASF dual-hosted git repository.

bbovenzi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new dc27827de26 UI: Fix Dag code highlighting for f-strings (#68026)
dc27827de26 is described below

commit dc27827de265f9aa49ecb1f4698c521c939df3fb
Author: Revanth <[email protected]>
AuthorDate: Thu Jun 4 13:40:45 2026 -0500

    UI: Fix Dag code highlighting for f-strings (#68026)
---
 .../src/components/MonacoEditor/configureMonaco.ts |  40 +++--
 .../components/MonacoEditor/pythonFStrings.test.ts | 183 +++++++++++++++++++++
 .../src/components/MonacoEditor/pythonFStrings.ts  | 100 +++++++++++
 airflow-core/src/airflow/ui/src/vite-env.d.ts      |  11 ++
 4 files changed, 323 insertions(+), 11 deletions(-)

diff --git 
a/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts 
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts
index 66af2adee05..737de9e020c 100644
--- a/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts
+++ b/airflow-core/src/airflow/ui/src/components/MonacoEditor/configureMonaco.ts
@@ -18,6 +18,8 @@
  */
 import { loader } from "@monaco-editor/react";
 
+import { patchPythonFStrings } from "./pythonFStrings";
+
 type MonacoEnvironment = {
   readonly getWorker: (_moduleId: string, label: string) => Worker;
 };
@@ -50,18 +52,18 @@ const loadMonacoModules = async () => {
     
import("monaco-editor/esm/vs/language/json/json.worker.js?worker&url").then((module)
 => module.default),
   ]);
 
-  const languageContributions = Promise.all([
-    import("monaco-editor/esm/vs/basic-languages/python/python.contribution"),
-    import("monaco-editor/esm/vs/language/json/monaco.contribution"),
-  ]);
+  // The JSON contribution registers its language as a side effect. Python is 
registered
+  // manually below from its grammar module instead of importing 
`python.contribution`,
+  // whose lazy tokens provider would overwrite our patched grammar on first 
use.
+  // The grammar module is a private monaco internal (verified against 
monaco-editor
+  // 0.52.2); the runtime guard below fails loudly if its export shape changes.
+  const jsonContribution = 
import("monaco-editor/esm/vs/language/json/monaco.contribution");
+  const pythonGrammar = 
import("monaco-editor/esm/vs/basic-languages/python/python.js");
 
-  const [monaco, [editorWorkerUrl, jsonWorkerUrl]] = await Promise.all([
-    monacoApi,
-    workerUrls,
-    languageContributions,
-  ]);
+  const [monaco, [editorWorkerUrl, jsonWorkerUrl], { conf: pythonConf, 
language: pythonLanguage }] =
+    await Promise.all([monacoApi, workerUrls, pythonGrammar, 
jsonContribution]);
 
-  return { editorWorkerUrl, jsonWorkerUrl, monaco };
+  return { editorWorkerUrl, jsonWorkerUrl, monaco, pythonConf, pythonLanguage 
};
 };
 
 const createWorkerFromUrl = (workerUrl: string): Worker => {
@@ -78,12 +80,28 @@ export const configureMonaco = () => {
   }
 
   configurationPromise = loadMonacoModules()
-    .then(({ editorWorkerUrl, jsonWorkerUrl, monaco }) => {
+    .then(({ editorWorkerUrl, jsonWorkerUrl, monaco, pythonConf, 
pythonLanguage }) => {
       Reflect.set(globalThis, "MonacoEnvironment", {
         getWorker: (_moduleId: string, label: string) =>
           createWorkerFromUrl(label === "json" ? jsonWorkerUrl : 
editorWorkerUrl),
       } satisfies MonacoEnvironment);
 
+      // Register Python with the patched grammar (triple-quoted f-string 
support). The
+      // editor always sets `language="python"` explicitly, so no 
extensions/firstLine
+      // auto-detection metadata is needed. Guard the internal grammar export 
shape: if a
+      // monaco upgrade drops these, fail loudly here rather than silently 
disabling
+      // Python highlighting (`setMonarchTokensProvider("python", undefined)`).
+      // The `conf`/`language` types come from a hand-written ambient 
declaration, so
+      // TypeScript believes they are always defined; this guard checks the 
real runtime
+      // shape the types cannot vouch for.
+      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+      if (pythonConf === undefined || pythonLanguage === undefined) {
+        throw new Error("monaco Python grammar module changed shape: missing 
`conf`/`language` export");
+      }
+      monaco.languages.register({ id: "python" });
+      monaco.languages.setLanguageConfiguration("python", pythonConf);
+      monaco.languages.setMonarchTokensProvider("python", 
patchPythonFStrings(pythonLanguage));
+
       loader.config({ monaco });
     })
     .catch((error: unknown) => {
diff --git 
a/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.test.ts
 
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.test.ts
new file mode 100644
index 00000000000..0fb4184a660
--- /dev/null
+++ 
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.test.ts
@@ -0,0 +1,183 @@
+/*!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import type { languages } from "monaco-editor";
+import { beforeAll, describe, expect, it } from "vitest";
+
+import { patchPythonFStrings } from "./pythonFStrings";
+
+// A minimal stand-in for Monaco's bundled Python grammar: enough states for 
the
+// patch's references to resolve and for the immutability check below. It is 
not a
+// faithful copy of the real `strings` state (which the patch replaces 
wholesale).
+const buildBaseLanguage = (): languages.IMonarchLanguage => ({
+  defaultToken: "",
+  tokenizer: {
+    fDblStringBody: [[/"/u, "string.escape", "@popall"]],
+    fStringBody: [[/'/u, "string.escape", "@popall"]],
+    fStringDetail: [[/x/u, "identifier", "@pop"]],
+    root: [{ include: "@strings" }],
+    strings: [
+      [/f"{1,3}/u, "string.escape", "@fDblStringBody"],
+      [/f'{1,3}/u, "string.escape", "@fStringBody"],
+    ],
+  },
+});
+
+const firstSources = (rules: Array<languages.IMonarchLanguageRule>): 
Array<string> =>
+  rules.map((rule) => (Array.isArray(rule) && rule[0] instanceof RegExp ? 
rule[0].source : ""));
+
+const typesAt = (lines: Array<Array<string>>, index: number): Array<string> => 
lines[index] ?? [];
+
+const tokenTypes = (line: Array<{ type: string }>): Array<string> => 
line.map((token) => token.type);
+
+describe("patchPythonFStrings", () => {
+  it("does not mutate the input grammar", () => {
+    const base = buildBaseLanguage();
+    const snapshot = JSON.stringify(base, (_key, value: unknown) =>
+      value instanceof RegExp ? value.source : value,
+    );
+
+    patchPythonFStrings(base);
+
+    expect(
+      JSON.stringify(base, (_key, value: unknown) => (value instanceof RegExp 
? value.source : value)),
+    ).toBe(snapshot);
+  });
+
+  it("adds dedicated triple-quoted f-string states for both quote styles", () 
=> {
+    const patched = patchPythonFStrings(buildBaseLanguage());
+
+    expect(patched.tokenizer.fStringBodyTriple).toBeDefined();
+    expect(patched.tokenizer.fDblStringBodyTriple).toBeDefined();
+  });
+
+  it("preserves the base single-line f-string states", () => {
+    const patched = patchPythonFStrings(buildBaseLanguage());
+
+    expect(patched.tokenizer.fStringBody).toBeDefined();
+    expect(patched.tokenizer.fDblStringBody).toBeDefined();
+    expect(patched.tokenizer.fStringDetail).toBeDefined();
+  });
+
+  it.each(["fStringBody", "fDblStringBody"])(
+    "prepends escaped-brace rules to the single-line %s state",
+    (state) => {
+      const patched = patchPythonFStrings(buildBaseLanguage());
+      const sources = firstSources(patched.tokenizer[state] as 
Array<languages.IMonarchLanguageRule>);
+
+      expect(sources[0]).toBe("\\{\\{");
+      expect(sources[1]).toBe("\\}\\}");
+    },
+  );
+
+  it("routes triple quotes before single quotes in the strings state", () => {
+    const patched = patchPythonFStrings(buildBaseLanguage());
+    const sources = firstSources(patched.tokenizer.strings as 
Array<languages.IMonarchLanguageRule>);
+
+    const tripleDbl = sources.findIndex((source) => source.includes('"""'));
+    const singleDbl = sources.indexOf('[Ff]"');
+    const tripleSingle = sources.findIndex((source) => source.includes("'''"));
+    const singleSingle = sources.indexOf("[Ff]'");
+
+    expect(tripleDbl).toBeGreaterThanOrEqual(0);
+    expect(tripleSingle).toBeGreaterThanOrEqual(0);
+    expect(tripleDbl).toBeLessThan(singleDbl);
+    expect(tripleSingle).toBeLessThan(singleSingle);
+  });
+
+  it.each([
+    ["fDblStringBodyTriple", '"""'],
+    ["fStringBodyTriple", "'''"],
+  ])("closes %s on its triple quote and matches escaped braces before 
interpolation", (state, quote) => {
+    const patched = patchPythonFStrings(buildBaseLanguage());
+    const sources = firstSources(patched.tokenizer[state] as 
Array<languages.IMonarchLanguageRule>);
+
+    const close = sources.findIndex((source) => source.includes(quote));
+    const escapedOpen = sources.findIndex((source) => 
source.includes("\\{\\{"));
+    const interpolation = sources.findIndex((source) => 
source.includes("[^!':=}]"));
+    const body = sources.findIndex((source) => source.startsWith("[^"));
+
+    expect(close).toBe(0);
+    expect(close).toBeLessThan(body);
+    expect(escapedOpen).toBeGreaterThanOrEqual(0);
+    expect(escapedOpen).toBeLessThan(interpolation);
+  });
+});
+
+// Exercises the patched grammar through Monaco's real Monarch tokenizer to 
prove
+// the fix end to end. Mirrors files/dags/fstring_repro.py from issue #67986.
+// Loads full monaco and registers the language once for the whole suite.
+describe("patchPythonFStrings (tokenized)", () => {
+  let lines: Array<Array<string>> = [];
+  let singleLineTokens: Array<{ offset: number; type: string }> = [];
+
+  beforeAll(async () => {
+    const monaco = await import("monaco-editor/esm/vs/editor/editor.api");
+    const { conf, language } = await 
import("monaco-editor/esm/vs/basic-languages/python/python.js");
+
+    monaco.languages.register({ id: "python" });
+    monaco.languages.setLanguageConfiguration("python", conf);
+    monaco.languages.setMonarchTokensProvider("python", 
patchPythonFStrings(language));
+
+    const source = [
+      '    sql = f"""',
+      "    SELECT CASE WHEN COUNT(*) = 0 THEN 1 ELSE 0 END",
+      "    FROM {table_name}",
+      "    WHERE ds = '{{{{ ds }}}}'",
+      '    """',
+      "    return sql",
+    ].join("\n");
+
+    lines = monaco.editor.tokenize(source, "python").map(tokenTypes);
+
+    // Single-line f-string: `{{` at offset 2 (after `f"`) is a literal brace,
+    // `{var}` is a real interpolation.
+    singleLineTokens = monaco.editor.tokenize('f"{{lit}} {var}"', "python")[0] 
?? [];
+  }, 30_000);
+
+  it("keeps the whole multi-line f-string body string-colored", () => {
+    // A plain SQL line and the interpolation line both stay inside the string,
+    // rather than being re-tokenized as Python code after the first line.
+    expect(typesAt(lines, 1)).toEqual(["string.python"]);
+    expect(typesAt(lines, 2)).toContain("string.python");
+    // Escaped braces `{{{{ ds }}}}` are literal string content, not 
interpolation.
+    expect(typesAt(lines, 3)).toEqual(["string.python"]);
+  });
+
+  it("colors interpolations inside the f-string as identifiers", () => {
+    // `FROM ` is string, `{table_name}` is an interpolation.
+    expect(typesAt(lines, 2)).toContain("identifier.python");
+  });
+
+  it("ends the string at the closing triple quote so following code is not 
string-colored", () => {
+    // The closing `"""` terminates the string...
+    expect(typesAt(lines, 4)).toContain("string.escape.python");
+    // ...so `return sql` is real Python again, not part of the string.
+    expect(typesAt(lines, 5)).toContain("keyword.python");
+    expect(typesAt(lines, 5)).not.toContain("string.python");
+  });
+
+  it("treats single-line escaped braces as string and real interpolation as 
identifier", () => {
+    // `{{` (offset 2) is a literal brace rendered as string, not 
interpolation.
+    const braceToken = singleLineTokens.find((token) => token.offset === 2);
+
+    expect(braceToken?.type).toBe("string.python");
+    // `{var}` is still a real interpolation.
+    expect(singleLineTokens.some((token) => token.type === 
"identifier.python")).toBe(true);
+  });
+});
diff --git 
a/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.ts 
b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.ts
new file mode 100644
index 00000000000..46bed43c658
--- /dev/null
+++ b/airflow-core/src/airflow/ui/src/components/MonacoEditor/pythonFStrings.ts
@@ -0,0 +1,100 @@
+/*!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import type { languages } from "monaco-editor";
+
+// Escaped f-string braces: `{{` / `}}` are literal `{` / `}`, not 
interpolation.
+// Matching two braces wins over the single-brace interpolation rule, and a 
single
+// `{` still falls through to interpolation, so these can sit at the front of a
+// single-line state or before the interpolation rule in a triple state.
+const ESCAPED_BRACE_RULES: Array<languages.IMonarchLanguageRule> = [
+  [/\{\{/u, "string"],
+  [/\}\}/u, "string"],
+];
+
+/**
+ * Monaco's bundled Python Monarch grammar mishandles f-strings two ways:
+ *
+ * 1. Triple-quoted f-strings: `f"""..."""` is routed to the single-line 
f-string
+ *    state whose first rule does `@popall` at end of line, so a multi-line
+ *    f-string loses tokenizer sync after its first line: the string leaks past
+ *    its closing `"""` (coloring following code as string) or terminates 
early.
+ * 2. Escaped braces: `{{` / `}}` are mis-parsed as interpolation rather than 
as
+ *    literal string content.
+ *
+ * See https://github.com/apache/airflow/issues/67986.
+ *
+ * This patch:
+ * - rewrites the `strings` state so triple quotes route to two newly added
+ *   multi-line states (`fStringBodyTriple` / `fDblStringBodyTriple`); and
+ * - prepends escaped-brace rules to the single-line `fStringBody` /
+ *   `fDblStringBody` states.
+ *
+ * Every other state is left untouched. The triple states stay inside the 
string
+ * across line breaks, treat escaped `{{` / `}}` as literal string content, 
color
+ * `{...}` interpolations via the existing `fStringDetail` state, and exit 
only on
+ * the matching triple quote. Returns a new grammar object; `language` is never
+ * mutated.
+ *
+ * Caveats:
+ * - This reaches into monaco's bundled basic-language grammar and references 
its
+ *   internal state names (`fStringDetail`, `fStringBody`, `fDblStringBody`).
+ *   Verified against monaco-editor 0.52.2; recheck on monaco upgrades (the
+ *   tokenizer test in this folder is the guard).
+ * - Raw f-strings (`rf"""` / `fr"""`) are out of scope and remain unhandled, 
as
+ *   they already were in the bundled grammar.
+ */
+export const patchPythonFStrings = (language: languages.IMonarchLanguage): 
languages.IMonarchLanguage => {
+  const tokenizer: languages.IMonarchLanguage["tokenizer"] = {
+    ...language.tokenizer,
+    fDblStringBody: [...ESCAPED_BRACE_RULES, 
...(language.tokenizer.fDblStringBody ?? [])],
+    fDblStringBodyTriple: [
+      [/"""/u, "string.escape", "@popall"],
+      ...ESCAPED_BRACE_RULES,
+      [/\{[^!':=}]+/u, "identifier", "@fStringDetail"],
+      [/\\./u, "string"],
+      [/\\$/u, "string"],
+      [/[^"\\{}]+/u, "string"],
+      [/["{}]/u, "string"],
+    ],
+    fStringBody: [...ESCAPED_BRACE_RULES, ...(language.tokenizer.fStringBody 
?? [])],
+    fStringBodyTriple: [
+      [/'''/u, "string.escape", "@popall"],
+      ...ESCAPED_BRACE_RULES,
+      [/\{[^!':=}]+/u, "identifier", "@fStringDetail"],
+      [/\\./u, "string"],
+      [/\\$/u, "string"],
+      [/[^'\\{}]+/u, "string"],
+      [/['{}]/u, "string"],
+    ],
+    // Triple-quote rules must precede the single-quote rules so `f"""` is not
+    // consumed as `f"` + `""`. `[Ff]` covers both `f"""` and `F"""`.
+    strings: [
+      [/'$/u, "string.escape", "@popall"],
+      [/[Ff]'''/u, "string.escape", "@fStringBodyTriple"],
+      [/[Ff]'/u, "string.escape", "@fStringBody"],
+      [/'/u, "string.escape", "@stringBody"],
+      [/"$/u, "string.escape", "@popall"],
+      [/[Ff]"""/u, "string.escape", "@fDblStringBodyTriple"],
+      [/[Ff]"/u, "string.escape", "@fDblStringBody"],
+      [/"/u, "string.escape", "@dblStringBody"],
+    ],
+  };
+
+  return { ...language, tokenizer };
+};
diff --git a/airflow-core/src/airflow/ui/src/vite-env.d.ts 
b/airflow-core/src/airflow/ui/src/vite-env.d.ts
index c7f6825d014..e520ef77aa6 100644
--- a/airflow-core/src/airflow/ui/src/vite-env.d.ts
+++ b/airflow-core/src/airflow/ui/src/vite-env.d.ts
@@ -29,3 +29,14 @@ interface ImportMeta {
 // no typings of their own.
 declare module "monaco-editor/esm/vs/editor/contrib/folding/browser/folding";
 declare module "monaco-editor/esm/vs/base/browser/ui/codicons/codiconStyles";
+
+// The Python basic-language module exports its Monarch grammar (`conf` / 
`language`)
+// but ships no `.d.ts` of its own.
+declare module "monaco-editor/esm/vs/basic-languages/python/python.js" {
+  // `import(...)` type syntax is required here: a top-level `import type` 
would turn this
+  // ambient declaration file into a module and break the `ImportMeta` 
augmentation above.
+  /* eslint-disable @typescript-eslint/consistent-type-imports */
+  export const conf: import("monaco-editor").languages.LanguageConfiguration;
+  export const language: import("monaco-editor").languages.IMonarchLanguage;
+  /* eslint-enable @typescript-eslint/consistent-type-imports */
+}

Reply via email to