https://github.com/python/cpython/commit/ce01ab536f22a3cf095d621f3b3579c1e3567859
commit: ce01ab536f22a3cf095d621f3b3579c1e3567859
branch: main
author: Sam Gross <colesb...@gmail.com>
committer: serhiy-storchaka <storch...@gmail.com>
date: 2024-01-23T20:14:46Z
summary:

gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269)

The iterator returned by ElementTree.iterparse() may hold on to a file
descriptor. The reference cycle prevented prompt clean-up of the file
descriptor if the returned iterator was not exhausted.

files:
A Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
M Lib/xml/etree/ElementTree.py

diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 42574eefd81beb..ae6575028be11c 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -99,6 +99,7 @@
 import collections
 import collections.abc
 import contextlib
+import weakref
 
 from . import ElementPath
 
@@ -1223,13 +1224,14 @@ def iterparse(source, events=None, parser=None):
     # parser argument of iterparse is removed, this can be killed.
     pullparser = XMLPullParser(events=events, _parser=parser)
 
-    def iterator(source):
+    if not hasattr(source, "read"):
+        source = open(source, "rb")
+        close_source = True
+    else:
         close_source = False
+
+    def iterator(source):
         try:
-            if not hasattr(source, "read"):
-                source = open(source, "rb")
-                close_source = True
-            yield None
             while True:
                 yield from pullparser.read_events()
                 # load event buffer
@@ -1239,18 +1241,23 @@ def iterator(source):
                 pullparser.feed(data)
             root = pullparser._close_and_return_root()
             yield from pullparser.read_events()
-            it.root = root
+            it = wr()
+            if it is not None:
+                it.root = root
         finally:
             if close_source:
                 source.close()
 
     class IterParseIterator(collections.abc.Iterator):
         __next__ = iterator(source).__next__
-    it = IterParseIterator()
-    it.root = None
-    del iterator, IterParseIterator
 
-    next(it)
+        def __del__(self):
+            if close_source:
+                source.close()
+
+    it = IterParseIterator()
+    wr = weakref.ref(it)
+    del IterParseIterator
     return it
 
 
diff --git 
a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst 
b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
new file mode 100644
index 00000000000000..9b69b5deb1b5a0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
@@ -0,0 +1,4 @@
+Avoid reference cycle in ElementTree.iterparse. The iterator returned by
+``ElementTree.iterparse`` may hold on to a file descriptor. The reference
+cycle prevented prompt clean-up of the file descriptor if the returned
+iterator was not exhausted.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to