Nick Coghlan <ncogh...@gmail.com> added the comment:

I should probably update that posted recipe to my latest version (which adds 
"excluded_files" and "excluded_dirs" parameters).

However, since I've been dealing with remote filesystems where os.listdir() and 
os.stat() calls from the local machine aren't possible lately, I also think we 
may need to reconsider how this is structured and look at the idea of building 
a more effective pipeline model that permits more efficient modes of 
interaction.

Let's take 'os.walk' as the base primitive - the basis of the pipeline will 
always be an iterator that produces 3-tuples of a base name, a list of 
subdirectories and a list of files. The filtering pipeline elements will 
require that the underlying walk include "topdown=True" and pay attention to 
changes in the subdirectory list.


Then consider the following possible pipeline elements:

def filter_dirs(walk_iter, *include_filters, exclude_filters=()):
    def should_include(dirname):
       return any(fnmatch(dirname, include) for include in include_filters)
    def should_exclude(dirname):
       return any(fnmatch(dirname, include) for exclude in exclude_filters)
    for dirpath, subdirs, files in walk_iter:
        subdirs[:] = [subdir for subdir in subdirs
                         if should_include(subdir) and not 
should_exclude(subdir)]
        yield dirpath, subdirs, files

def filter_files(walk_iter, *include_filters, exclude_filters=()):
    def should_include(dirname):
       return any(fnmatch(dirname, include) for include in include_filters)
    def should_exclude(dirname):
       return any(fnmatch(dirname, include) for exclude in exclude_filters)
    for dirpath, subdirs, files in walk_iter:
        files[:] = [fname for fname in files
                         if should_include(fname) and not should_exclude(fname)]
        yield dirpath, subdirs, files

def limit_depth(walk_iter, depth):
    if depth < 0:
        msg = "Depth limit greater than 0 ({!r} provided)"
        raise ValueError(msg.format(depth))
    sep = os.sep
    for top, subdirs, files in walk_iter:
        yield top, subdirs, files
        initial_depth = top.count(sep)
        if depth == 0:
            subdirs[:] = []
        break
    for dirpath, subdirs, files in walk_iter:
        yield dirpath, subdirs, files
        current_depth = dirpath.count(sep) - initial_depth
        if current_depth >= depth:
            subdirs[:] = []

def detect_symlink_loops(walk_iter, onloop=None):
    if onloop is None:
        def onloop(path):
            msg = "Symlink {!r} refers to a parent directory, skipping\n"
            sys.stderr.write(msg.format(path))
            sys.stderr.flush()
    for top, subdirs, files in walk_iter:
        yield top, subdirs, files
        real_top = os.path.abspath(os.path.realpath(top))
        break
    for dirpath, subdirs, files in walk_iter:
        if os.path.islink(dirpath):
            # We just descended into a directory via a symbolic link
            # Check if we're referring to a directory that is
            # a parent of our nominal directory
            relative = os.path.relpath(dirpath, top)
            nominal_path = os.path.join(real_top, relative)
            real_path = os.path.abspath(os.path.realpath(dirpath))
            path_fragments = zip(nominal_path.split(sep), real_path.split(sep))
            for nominal, real in path_fragments:
                if nominal != real:
                    break
            else:
                if not onloop(dirpath):
                    subdirs[:] = []
                    continue
        yield dirpath, subdirs, files

And pipeline terminators:

def walk_dirs(walk_iter):
    for dirpath, subdirs, files in walk_iter:
        yield dirpath

def walk_files(walk_iter):
    for dirpath, subdirs, files in walk_iter:
        for fname in files:
            yield os.path.join(dirpath, fname)

def walk_all(walk_iter):
    for dirpath, subdirs, files in walk_iter:
        yield dirpath
        for fname in files:
            yield os.path.join(dirpath, fname)

The pipeline terminators could then be combined with ordinary iterable 
consumers like comprehensions:

    base_walk = detect_symlink_loops(os.walk(os.path.abspath(base_dir, 
followlinks=True)))
    depth_limited_walk = limit_depth(base_walk, 2)
    filtered_walk = filter_dirs(filter_files(depth_limited_walk, "*.py"), 
"*.pyp")
    tree_info = {path, os.stat(path) for path in walk_all(filtered_walk)}

----------

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue13229>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to