This is an automated email from the ASF dual-hosted git repository.
kgabryje pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 54f19856de fix: HTML detection in tables (#37171)
54f19856de is described below
commit 54f19856de1eb278ea3bfdc96ad41421445b290c
Author: Kamil Gabryjelski <[email protected]>
AuthorDate: Fri Jan 16 15:39:32 2026 +0100
fix: HTML detection in tables (#37171)
---
.../superset-ui-core/src/utils/html.test.tsx | 18 +++
.../packages/superset-ui-core/src/utils/html.tsx | 129 +++++++++++----------
2 files changed, 85 insertions(+), 62 deletions(-)
diff --git
a/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx
b/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx
index 87cb0abb6e..6dd4e279f0 100644
--- a/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx
+++ b/superset-frontend/packages/superset-ui-core/src/utils/html.test.tsx
@@ -65,6 +65,24 @@ describe('isProbablyHTML', () => {
expect(isProbablyHTML('if x < 5 and y > 10')).toBe(false);
expect(isProbablyHTML('price < $100')).toBe(false);
});
+
+ it('should return true for all known HTML tags', () => {
+ expect(isProbablyHTML('<section>Content</section>')).toBe(true);
+ expect(isProbablyHTML('<article>Content</article>')).toBe(true);
+ expect(isProbablyHTML('<nav>Content</nav>')).toBe(true);
+ expect(isProbablyHTML('<header>Content</header>')).toBe(true);
+ expect(isProbablyHTML('<footer>Content</footer>')).toBe(true);
+ expect(isProbablyHTML('<button>Click me</button>')).toBe(true);
+ expect(isProbablyHTML('<form>Content</form>')).toBe(true);
+ expect(isProbablyHTML('<input type="text">')).toBe(true);
+ expect(isProbablyHTML('<textarea>Content</textarea>')).toBe(true);
+ expect(isProbablyHTML('<select><option>1</option></select>')).toBe(true);
+ expect(isProbablyHTML('<blockquote>Quote</blockquote>')).toBe(true);
+ expect(isProbablyHTML('<video src="video.mp4"></video>')).toBe(true);
+ expect(isProbablyHTML('<audio src="audio.mp3"></audio>')).toBe(true);
+ expect(isProbablyHTML('<canvas></canvas>')).toBe(true);
+ expect(isProbablyHTML('<iframe src="page.html"></iframe>')).toBe(true);
+ });
});
describe('sanitizeHtmlIfNeeded', () => {
diff --git a/superset-frontend/packages/superset-ui-core/src/utils/html.tsx
b/superset-frontend/packages/superset-ui-core/src/utils/html.tsx
index 3055108f0b..de7267c823 100644
--- a/superset-frontend/packages/superset-ui-core/src/utils/html.tsx
+++ b/superset-frontend/packages/superset-ui-core/src/utils/html.tsx
@@ -52,11 +52,73 @@ export function sanitizeHtml(htmlString: string) {
return xssFilter.process(htmlString);
}
-export function hasHtmlTagPattern(str: string): boolean {
- const htmlTagPattern =
- /<(html|head|body|div|span|a|p|h[1-6]|title|meta|link|script|style)/i;
+const KNOWN_HTML_TAGS = new Set([
+ 'div',
+ 'span',
+ 'p',
+ 'a',
+ 'b',
+ 'i',
+ 'u',
+ 'em',
+ 'strong',
+ 'h1',
+ 'h2',
+ 'h3',
+ 'h4',
+ 'h5',
+ 'h6',
+ 'table',
+ 'tr',
+ 'td',
+ 'th',
+ 'tbody',
+ 'thead',
+ 'tfoot',
+ 'ul',
+ 'ol',
+ 'li',
+ 'img',
+ 'br',
+ 'hr',
+ 'pre',
+ 'code',
+ 'blockquote',
+ 'section',
+ 'article',
+ 'nav',
+ 'header',
+ 'footer',
+ 'form',
+ 'input',
+ 'button',
+ 'select',
+ 'option',
+ 'textarea',
+ 'label',
+ 'fieldset',
+ 'legend',
+ 'video',
+ 'audio',
+ 'canvas',
+ 'iframe',
+ 'script',
+ 'style',
+ 'link',
+ 'meta',
+ 'title',
+ 'html',
+ 'head',
+ 'body',
+]);
+
+const HTML_TAG_PATTERN = new RegExp(
+ `<(${Array.from(KNOWN_HTML_TAGS).join('|')})\\b`,
+ 'i',
+);
- return htmlTagPattern.test(str);
+export function hasHtmlTagPattern(str: string): boolean {
+ return HTML_TAG_PATTERN.test(str);
}
export function isProbablyHTML(text: string) {
@@ -91,64 +153,7 @@ export function isProbablyHTML(text: string) {
// This prevents strings like "<abcdef:12345>" from being treated as HTML
return elements.some(element => {
const tagName = element.tagName.toLowerCase();
- // List of common HTML tags we want to recognize
- const knownHtmlTags = [
- 'div',
- 'span',
- 'p',
- 'a',
- 'b',
- 'i',
- 'u',
- 'em',
- 'strong',
- 'h1',
- 'h2',
- 'h3',
- 'h4',
- 'h5',
- 'h6',
- 'table',
- 'tr',
- 'td',
- 'th',
- 'tbody',
- 'thead',
- 'tfoot',
- 'ul',
- 'ol',
- 'li',
- 'img',
- 'br',
- 'hr',
- 'pre',
- 'code',
- 'blockquote',
- 'section',
- 'article',
- 'nav',
- 'header',
- 'footer',
- 'form',
- 'input',
- 'button',
- 'select',
- 'option',
- 'textarea',
- 'label',
- 'fieldset',
- 'legend',
- 'video',
- 'audio',
- 'canvas',
- 'iframe',
- 'script',
- 'style',
- 'link',
- 'meta',
- 'title',
- ];
- return knownHtmlTags.includes(tagName);
+ return KNOWN_HTML_TAGS.has(tagName);
});
}