https://github.com/python/cpython/commit/e76ff560b015a585e185d9a21d3c18540c6b5fcd
commit: e76ff560b015a585e185d9a21d3c18540c6b5fcd
branch: 3.13
author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com>
committer: serhiy-storchaka <storch...@gmail.com>
date: 2025-05-10T17:58:29Z
summary:

[3.13] gh-86155: Fix data loss after unclosed script or style tag in HTMLParser 
(GH-22658) (GH-133845)

When calling .close() the HTMLParser should flush all remaining content,
even when that content is in an unclosed script or style tag.
(cherry picked from commit 53383e90e4df7029f792b7aa81aa2e4cff348ed0)

Co-authored-by: Waylan Limberg <waylan.limb...@icloud.com>

files:
A Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst
M Lib/html/parser.py
M Lib/test/test_htmlparser.py

diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 1b8b6ea0e5ab7a..1e30956fe24f83 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -260,7 +260,7 @@ def goahead(self, end):
             else:
                 assert 0, "interesting.search() lied"
         # end while
-        if end and i < n and not self.cdata_elem:
+        if end and i < n:
             if self.convert_charrefs and not self.cdata_elem:
                 self.handle_data(unescape(rawdata[i:n]))
             else:
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 68649e9d6d5e9c..61fa24fab574f2 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -317,6 +317,16 @@ def get_events(self):
                                 ("endtag", element_lower)],
                             collector=Collector(convert_charrefs=False))
 
+    def test_EOF_in_cdata(self):
+        content = """<!-- not a comment --> &not-an-entity-ref;
+                  <a href="" /> </p><p> <span></span></style>
+                  '</script' + '>'"""
+        s = f'<script>{content}'
+        self._run_check(s, [
+            ("starttag", 'script', []),
+            ("data", content)
+        ])
+
     def test_comments(self):
         html = ("<!-- I'm a valid comment -->"
                 '<!--me too!-->'
diff --git 
a/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst 
b/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst
new file mode 100644
index 00000000000000..bb85481b229697
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst
@@ -0,0 +1,2 @@
+:meth:`html.parser.HTMLParser.close` no longer loses data when the
+``<script>`` tag is not closed. Patch by Waylan Limberg.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to