This is an automated email from the ASF dual-hosted git repository.

kgabryje pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 54f19856de fix: HTML detection in tables (#37171)
54f19856de is described below

commit 54f19856de1eb278ea3bfdc96ad41421445b290c
Author: Kamil Gabryjelski <[email protected]>
AuthorDate: Fri Jan 16 15:39:32 2026 +0100

    fix: HTML detection in tables (#37171)
---
 .../superset-ui-core/src/utils/html.test.tsx       |  18 +++
 .../packages/superset-ui-core/src/utils/html.tsx   | 129 +++++++++++----------
 2 files changed, 85 insertions(+), 62 deletions(-)

diff --git 
a/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx 
b/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx
index 87cb0abb6e..6dd4e279f0 100644
--- a/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx
+++ b/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx
@@ -65,6 +65,24 @@ describe('isProbablyHTML', () => {
     expect(isProbablyHTML('if x < 5 and y > 10')).toBe(false);
     expect(isProbablyHTML('price < $100')).toBe(false);
   });
+
+  it('should return true for all known HTML tags', () => {
+    expect(isProbablyHTML('<section>Content</section>')).toBe(true);
+    expect(isProbablyHTML('<article>Content</article>')).toBe(true);
+    expect(isProbablyHTML('<nav>Content</nav>')).toBe(true);
+    expect(isProbablyHTML('<header>Content</header>')).toBe(true);
+    expect(isProbablyHTML('<footer>Content</footer>')).toBe(true);
+    expect(isProbablyHTML('<button>Click me</button>')).toBe(true);
+    expect(isProbablyHTML('<form>Content</form>')).toBe(true);
+    expect(isProbablyHTML('<input type="text">')).toBe(true);
+    expect(isProbablyHTML('<textarea>Content</textarea>')).toBe(true);
+    expect(isProbablyHTML('<select><option>1</option></select>')).toBe(true);
+    expect(isProbablyHTML('<blockquote>Quote</blockquote>')).toBe(true);
+    expect(isProbablyHTML('<video src="video.mp4"></video>')).toBe(true);
+    expect(isProbablyHTML('<audio src="audio.mp3"></audio>')).toBe(true);
+    expect(isProbablyHTML('<canvas></canvas>')).toBe(true);
+    expect(isProbablyHTML('<iframe src="page.html"></iframe>')).toBe(true);
+  });
 });
 
 describe('sanitizeHtmlIfNeeded', () => {
diff --git a/superset-frontend/packages/superset-ui-core/src/utils/html.tsx 
b/superset-frontend/packages/superset-ui-core/src/utils/html.tsx
index 3055108f0b..de7267c823 100644
--- a/superset-frontend/packages/superset-ui-core/src/utils/html.tsx
+++ b/superset-frontend/packages/superset-ui-core/src/utils/html.tsx
@@ -52,11 +52,73 @@ export function sanitizeHtml(htmlString: string) {
   return xssFilter.process(htmlString);
 }
 
-export function hasHtmlTagPattern(str: string): boolean {
-  const htmlTagPattern =
-    /<(html|head|body|div|span|a|p|h[1-6]|title|meta|link|script|style)/i;
+const KNOWN_HTML_TAGS = new Set([
+  'div',
+  'span',
+  'p',
+  'a',
+  'b',
+  'i',
+  'u',
+  'em',
+  'strong',
+  'h1',
+  'h2',
+  'h3',
+  'h4',
+  'h5',
+  'h6',
+  'table',
+  'tr',
+  'td',
+  'th',
+  'tbody',
+  'thead',
+  'tfoot',
+  'ul',
+  'ol',
+  'li',
+  'img',
+  'br',
+  'hr',
+  'pre',
+  'code',
+  'blockquote',
+  'section',
+  'article',
+  'nav',
+  'header',
+  'footer',
+  'form',
+  'input',
+  'button',
+  'select',
+  'option',
+  'textarea',
+  'label',
+  'fieldset',
+  'legend',
+  'video',
+  'audio',
+  'canvas',
+  'iframe',
+  'script',
+  'style',
+  'link',
+  'meta',
+  'title',
+  'html',
+  'head',
+  'body',
+]);
+
+const HTML_TAG_PATTERN = new RegExp(
+  `<(${Array.from(KNOWN_HTML_TAGS).join('|')})\\b`,
+  'i',
+);
 
-  return htmlTagPattern.test(str);
+export function hasHtmlTagPattern(str: string): boolean {
+  return HTML_TAG_PATTERN.test(str);
 }
 
 export function isProbablyHTML(text: string) {
@@ -91,64 +153,7 @@ export function isProbablyHTML(text: string) {
   // This prevents strings like "<abcdef:12345>" from being treated as HTML
   return elements.some(element => {
     const tagName = element.tagName.toLowerCase();
-    // List of common HTML tags we want to recognize
-    const knownHtmlTags = [
-      'div',
-      'span',
-      'p',
-      'a',
-      'b',
-      'i',
-      'u',
-      'em',
-      'strong',
-      'h1',
-      'h2',
-      'h3',
-      'h4',
-      'h5',
-      'h6',
-      'table',
-      'tr',
-      'td',
-      'th',
-      'tbody',
-      'thead',
-      'tfoot',
-      'ul',
-      'ol',
-      'li',
-      'img',
-      'br',
-      'hr',
-      'pre',
-      'code',
-      'blockquote',
-      'section',
-      'article',
-      'nav',
-      'header',
-      'footer',
-      'form',
-      'input',
-      'button',
-      'select',
-      'option',
-      'textarea',
-      'label',
-      'fieldset',
-      'legend',
-      'video',
-      'audio',
-      'canvas',
-      'iframe',
-      'script',
-      'style',
-      'link',
-      'meta',
-      'title',
-    ];
-    return knownHtmlTags.includes(tagName);
+    return KNOWN_HTML_TAGS.has(tagName);
   });
 }
 

Reply via email to