https://github.com/python/cpython/commit/1e610fb05fa4ba61a759b68461f1a9aed07622fc
commit: 1e610fb05fa4ba61a759b68461f1a9aed07622fc
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-01-20T03:06:00Z
summary:
GH-113225: Speed up `pathlib.Path.walk(top_down=False)` (#113693)
Use `_make_child_entry()` rather than `_make_child_relpath()` to retrieve
path objects for directories to visit. This saves the allocation of one
path object per directory in user subclasses of `PathBase`, and avoids a
second loop.
This trick does not apply when walking top-down, because users can affect
the walk by modifying *dirnames* in-place.
A side effect of this change is that, in bottom-up mode, subdirectories of
each directory are visited in reverse order, and that this order doesn't
match that of the names in *dirnames*. I suspect this is fine as the
order is arbitrary anyway.
files:
A Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst
M Lib/pathlib/_abc.py
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index e5eeb4afce2ea9..553e1a399061d3 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -820,6 +820,8 @@ def walk(self, top_down=True, on_error=None,
follow_symlinks=False):
with scandir_obj as scandir_it:
dirnames = []
filenames = []
+ if not top_down:
+ paths.append((path, dirnames, filenames))
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
@@ -828,16 +830,15 @@ def walk(self, top_down=True, on_error=None,
follow_symlinks=False):
is_dir = False
if is_dir:
+ if not top_down:
+ paths.append(path._make_child_entry(entry))
dirnames.append(entry.name)
else:
filenames.append(entry.name)
if top_down:
yield path, dirnames, filenames
- else:
- paths.append((path, dirnames, filenames))
-
- paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
+ paths += [path._make_child_relpath(d) for d in
reversed(dirnames)]
def absolute(self):
"""Return an absolute version of this path
diff --git
a/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst
b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst
new file mode 100644
index 00000000000000..0c07f42fd065d2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst
@@ -0,0 +1,2 @@
+Speed up :meth:`pathlib.Path.walk` by using :attr:`os.DirEntry.path` where
+possible.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]