-
-
Notifications
You must be signed in to change notification settings - Fork 32.4k
gh-89727: Fix os.walk to handle late editing of dirs #100703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
2767f70
04a16a2
6b0465f
2ff800f
072bdfb
41d7463
d81ff45
431d47a
2829a7d
b59358b
839f31e
1011e19
80b7e82
70cb0c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -302,7 +302,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False): | |
subdirectories (directories are generated bottom up). | ||
|
||
When topdown is true, the caller can modify the dirnames list in-place | ||
(e.g., via del or slice assignment), and walk will only recurse into the | ||
(e.g., via del or slice assignment), and walk will only traverse into the | ||
subdirectories whose names remain in dirnames; this can be used to prune the | ||
search, or to impose a specific order of visiting. Modifying dirnames when | ||
topdown is false has no effect on the behavior of os.walk(), since the | ||
|
@@ -341,31 +341,27 @@ def walk(top, topdown=True, onerror=None, followlinks=False): | |
""" | ||
sys.audit("os.walk", top, topdown, onerror, followlinks) | ||
|
||
stack = [fspath(top)] | ||
islink, join = path.islink, path.join | ||
while stack: | ||
top = stack.pop() | ||
if isinstance(top, tuple): | ||
yield top | ||
continue | ||
top = fspath(top) | ||
try: | ||
scandir_it = scandir(top) | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
return | ||
|
||
stack = [] | ||
islink, join = path.islink, path.join | ||
while True: | ||
dirs = [] | ||
nondirs = [] | ||
walk_dirs = [] | ||
use_entry = True | ||
|
||
# We may not have read permission for top, in which case we can't | ||
# get a list of the files the directory contains. | ||
# We suppress the exception here, rather than blow up for a | ||
# minor reason when (say) a thousand readable directories are still | ||
# left to visit. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment belongs with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. It can probably just go on the first instance of this block There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually in the latest commit where this block appears in separate functions, I decided to just remove this comment. The behavior is described in the docstring and I think the try-except block is pretty clear. Happy to add it or part of it back if deemed necessary |
||
try: | ||
scandir_it = scandir(top) | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
continue | ||
|
||
cont = False | ||
with scandir_it: | ||
while True: | ||
try: | ||
|
@@ -376,7 +372,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False): | |
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
cont = True | ||
use_entry = False | ||
break | ||
|
||
try: | ||
|
@@ -408,27 +404,61 @@ def walk(top, topdown=True, onerror=None, followlinks=False): | |
|
||
if walk_into: | ||
walk_dirs.append(entry.path) | ||
if cont: | ||
continue | ||
|
||
if topdown: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Such a high percentage of code in this function is now under either There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was actually thinking about this. I just gave it a shot to see. This seems to me like one of those cases where there are enough little differences in logic that it's much cleaner to separate the two. If we didn't support dir modification for topdown or didn't care about performance it might be simpler, but when you have a lot of the same logic interspersed by a lot of little differences it gets messy. In these kinds of situations I also tend to prefer some duplication between functions over one big function with conditions inside of loops. I also find it easier to look at the two sets of logic separately, but either way works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think the split version seems fine. Will wait a bit and see if some core devs have opinions. |
||
# Yield before sub-directory traversal if going top down | ||
yield top, dirs, nondirs | ||
# Traverse into sub-directories | ||
for dirname in reversed(dirs): | ||
new_path = join(top, dirname) | ||
# bpo-23605: os.path.islink() is used instead of caching | ||
# entry.is_symlink() result during the loop on os.scandir() because | ||
# the caller can replace the directory entry during the "yield" | ||
# above. | ||
if followlinks or not islink(new_path): | ||
stack.append(new_path) | ||
if use_entry: | ||
# Yield before sub-directory traversal if going top down | ||
yield top, dirs, nondirs | ||
# Append dir names to walk along with top, their parent dir | ||
stack.append([top, iter(dirs)]) | ||
|
||
# Set top and scandir_it for the next iteration | ||
while stack: | ||
root, dirs = stack[-1] | ||
for dirname in dirs: | ||
top = join(root, dirname) | ||
# bpo-23605: os.path.islink() is used instead of caching | ||
# entry.is_symlink() result during the loop on os.scandir() because | ||
# the caller can replace the directory entry during the previous | ||
# "yield" | ||
if followlinks or not islink(top): | ||
try: | ||
scandir_it = scandir(top) | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
else: | ||
break | ||
else: | ||
stack.pop() | ||
continue | ||
break | ||
else: | ||
return | ||
else: | ||
# Yield after sub-directory traversal if going bottom up | ||
stack.append((top, dirs, nondirs)) | ||
# Traverse into sub-directories | ||
for new_path in reversed(walk_dirs): | ||
stack.append(new_path) | ||
if use_entry: | ||
# Traverse into sub-directories by appending a value | ||
# (entry, dirs) where dirs will be walked and then | ||
# when dirs is exhausted entry will be yielded | ||
stack.append(((top, dirs, nondirs), iter(walk_dirs))) | ||
|
||
# Set top and scandir_it for the next iteration | ||
while stack: | ||
entry, dirs = stack[-1] | ||
for top in dirs: | ||
try: | ||
scandir_it = scandir(top) | ||
except OSError as error: | ||
if onerror is not None: | ||
onerror(error) | ||
else: | ||
break | ||
else: | ||
stack.pop() | ||
yield entry | ||
continue | ||
break | ||
else: | ||
return | ||
|
||
__all__.append("walk") | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.