https://github.com/python/cpython/commit/e76ff560b015a585e185d9a21d3c18540c6b5fcd commit: e76ff560b015a585e185d9a21d3c18540c6b5fcd branch: 3.13 author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com> committer: serhiy-storchaka <storch...@gmail.com> date: 2025-05-10T17:58:29Z summary:
[3.13] gh-86155: Fix data loss after unclosed script or style tag in HTMLParser (GH-22658) (GH-133845) When calling .close() the HTMLParser should flush all remaining content, even when that content is in an unclosed script or style tag. (cherry picked from commit 53383e90e4df7029f792b7aa81aa2e4cff348ed0) Co-authored-by: Waylan Limberg <waylan.limb...@icloud.com> files: A Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst M Lib/html/parser.py M Lib/test/test_htmlparser.py diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 1b8b6ea0e5ab7a..1e30956fe24f83 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -260,7 +260,7 @@ def goahead(self, end): else: assert 0, "interesting.search() lied" # end while - if end and i < n and not self.cdata_elem: + if end and i < n: if self.convert_charrefs and not self.cdata_elem: self.handle_data(unescape(rawdata[i:n])) else: diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 68649e9d6d5e9c..61fa24fab574f2 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -317,6 +317,16 @@ def get_events(self): ("endtag", element_lower)], collector=Collector(convert_charrefs=False)) + def test_EOF_in_cdata(self): + content = """<!-- not a comment --> ¬-an-entity-ref; + <a href="" /> </p><p> <span></span></style> + '</script' + '>'""" + s = f'<script>{content}' + self._run_check(s, [ + ("starttag", 'script', []), + ("data", content) + ]) + def test_comments(self): html = ("<!-- I'm a valid comment -->" '<!--me too!-->' diff --git a/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst b/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst new file mode 100644 index 00000000000000..bb85481b229697 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst @@ -0,0 +1,2 @@ +:meth:`html.parser.HTMLParser.close` no longer loses data when the +``<script>`` tag is not closed. Patch by Waylan Limberg. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com