On Fri, Oct 14, 2016 at 12:02 AM, Maciej Fijalkowski <fij...@gmail.com>
wrote:

> # HG changeset patch
> # User Maciej Fijalkowski <fij...@gmail.com>
> # Date 1476428549 -7200
> #      Fri Oct 14 09:02:29 2016 +0200
> # Node ID 4e80a66124279e235dec7ae8f58c0a14b0b137bf
> # Parent  c770219dc4c253d7cd82519ce3c74438bb2829d3
> osutil: implement listdir for linux
>
> diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py
> --- a/mercurial/pure/osutil.py
> +++ b/mercurial/pure/osutil.py
> @@ -66,13 +66,20 @@
>      return result
>
>  ffi = None
> -if modulepolicy not in policynocffi and sys.platform == 'darwin':
> +if modulepolicy not in policynocffi and (
> +        sys.platform == 'darwin' or sys.platform.startswith('linux')):
>      try:
>          from _osutil_cffi import ffi, lib
>      except ImportError:
>          if modulepolicy == 'cffi': # strict cffi import
>              raise
>
> +class stat_res(object):
> +    def __init__(self, st_mode, st_mtime, st_size):
> +        self.st_mode = st_mode
> +        self.st_mtime = st_mtime
> +        self.st_size = st_size
> +
>  if sys.platform == 'darwin' and ffi is not None:
>      listdir_batch_size = 4096
>      # tweakable number, only affects performance, which chunks
> @@ -88,12 +95,6 @@
>      attrkinds[lib.VFIFO] = statmod.S_IFIFO
>      attrkinds[lib.VSOCK] = statmod.S_IFSOCK
>
> -    class stat_res(object):
> -        def __init__(self, st_mode, st_mtime, st_size):
> -            self.st_mode = st_mode
> -            self.st_mtime = st_mtime
> -            self.st_size = st_size
> -
>      tv_sec_ofs = ffi.offsetof("struct timespec", "tv_sec")
>      buf = ffi.new("char[]", listdir_batch_size)
>
> @@ -117,7 +118,7 @@
>                  tp = attrkinds[cur.obj_type]
>                  if name == "." or name == "..":
>                      continue
> -                if skip == name and tp == statmod.S_ISDIR:
> +                if skip == name and tp == statmod.S_IFDIR:
>                      return []
>                  if stat:
>                      mtime = cur.mtime.tv_sec
> @@ -146,12 +147,74 @@
>          try:
>              ret = listdirinternal(dfd, req, stat, skip)
>          finally:
> +            lib.close(dfd)
> +        return ret
> +
> +elif sys.platform.startswith('linux') and ffi is not None:
> +
> +    _known_common_file_types = {
> +        lib.DT_DIR:  statmod.S_IFDIR,
> +        lib.DT_CHR:  statmod.S_IFCHR,
> +        lib.DT_BLK:  statmod.S_IFBLK,
> +        lib.DT_REG:  statmod.S_IFREG,
> +        lib.DT_FIFO: statmod.S_IFIFO,
> +        lib.DT_LNK:  statmod.S_IFLNK,
> +        lib.DT_SOCK: statmod.S_IFSOCK,
> +    }
> +
> +    def listdirinternal(dir, dirfd, stat, skip):
> +        buf = ffi.new("struct stat *")
> +        ret = []
> +        while True:
> +            ffi.errno = 0
> +            dirent = lib.readdir(dir)
> +            if not dirent:
> +                break
> +            dname = dirent.d_name
> +            if dname[0] == "." and (dname[1] == "\x00" or
> +                    (dname[1] == "." and dname[2] == "\x00")):
> +                continue
> +            #
> +            dtype = dirent.d_type
> +            if stat or dtype not in _known_common_file_types:
> +                if lib.fstatat(dirfd, dirent.d_name, buf,
> +                               lib.AT_SYMLINK_NOFOLLOW) != 0:
> +                    raise OSError(ffi.errno, os.strerror(ffi.errno))
> +                tp = statmod.S_IFMT(buf.st_mode)
> +            else:
> +                tp = _known_common_file_types[dtype]
> +            #
> +            name = ffi.string(dirent.d_name)
> +            if skip == name and tp == statmod.S_IFDIR:
> +                return []
> +            if stat:
> +                ret.append((name, tp, stat_res(
> +                               st_mode  = buf.st_mode,
> +                               st_mtime = buf.st_mtim.tv_sec,
> +                               st_size  = buf.st_size)))
> +            else:
> +                ret.append((name, tp))
> +
> +        if ffi.errno != 0:
> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
> +        return ret
> +
> +    def listdir(path, stat=False, skip=None):
> +        dfd = os.open(path, os.O_RDONLY)
> +        dir = lib.fdopendir(dfd)
> +        if dir == ffi.NULL:
>              try:
> -                lib.close(dfd)
> +                os.close(dfd)
>              except BaseException:
> -                pass # we ignore all the errors from closing, not
> -                # much we can do about that
> +                pass
> +            raise OSError(ffi.errno, os.strerror(ffi.errno))
> +
> +        try:
> +            ret = listdirinternal(dir, dfd, stat, skip)
> +        finally:
> +            lib.closedir(dir)
>          return ret
> +
>  else:
>      listdir = listdirpure
>
> diff --git a/setup.py b/setup.py
> --- a/setup.py
> +++ b/setup.py
> @@ -323,7 +323,7 @@
>              exts = [setup_mpatch_cffi.ffi.distutils_extension(),
>                      setup_bdiff_cffi.ffi.distutils_extension()]
>              # cffi modules go here
> -            if sys.platform == 'darwin':
> +            if sys.platform == 'darwin' or sys.platform.startswith('
> linux'):
>                  import setup_osutil_cffi
>                  exts.append(setup_osutil_cffi.ffi.distutils_extension())
>              self.distribution.ext_modules = exts
> diff --git a/setup_osutil_cffi.py b/setup_osutil_cffi.py
> --- a/setup_osutil_cffi.py
> +++ b/setup_osutil_cffi.py
> @@ -1,102 +1,159 @@
>  from __future__ import absolute_import
>
>  import cffi
> +import sys
>
> -ffi = cffi.FFI()
> -ffi.set_source("_osutil_cffi", """
> -#include <sys/attr.h>
> -#include <sys/vnode.h>
> -#include <unistd.h>
> -#include <fcntl.h>
> -#include <time.h>
> +if sys.platform == "darwin":
> +    ffi = cffi.FFI()
> +    ffi.set_source("_osutil_cffi", """
> +    #include <sys/attr.h>
> +    #include <sys/vnode.h>
> +    #include <unistd.h>
> +    #include <fcntl.h>
> +    #include <time.h>
>
> -typedef struct val_attrs {
> -    uint32_t          length;
> -    attribute_set_t   returned;
> -    attrreference_t   name_info;
> -    fsobj_type_t      obj_type;
> -    struct timespec   mtime;
> -    uint32_t          accessmask;
> -    off_t             datalength;
> -} __attribute__((aligned(4), packed)) val_attrs_t;
> -""", include_dirs=['mercurial'])
> -ffi.cdef('''
> +    typedef struct val_attrs {
> +        uint32_t          length;
> +        attribute_set_t   returned;
> +        attrreference_t   name_info;
> +        fsobj_type_t      obj_type;
> +        struct timespec   mtime;
> +        uint32_t          accessmask;
> +        off_t             datalength;
> +    } __attribute__((aligned(4), packed)) val_attrs_t;
> +    """, include_dirs=['mercurial'])
> +    ffi.cdef('''
>
> -typedef uint32_t attrgroup_t;
> +    typedef uint32_t attrgroup_t;
>
> -typedef struct attrlist {
> -    uint16_t     bitmapcount; /* number of attr. bit sets in list */
> -    uint16_t   reserved;    /* (to maintain 4-byte alignment) */
> -    attrgroup_t commonattr;  /* common attribute group */
> -    attrgroup_t volattr;     /* volume attribute group */
> -    attrgroup_t dirattr;     /* directory attribute group */
> -    attrgroup_t fileattr;    /* file attribute group */
> -    attrgroup_t forkattr;    /* fork attribute group */
> -    ...;
> -};
> +    typedef struct attrlist {
> +        uint16_t     bitmapcount; /* number of attr. bit sets in list */
> +        uint16_t   reserved;    /* (to maintain 4-byte alignment) */
> +        attrgroup_t commonattr;  /* common attribute group */
> +        attrgroup_t volattr;     /* volume attribute group */
> +        attrgroup_t dirattr;     /* directory attribute group */
> +        attrgroup_t fileattr;    /* file attribute group */
> +        attrgroup_t forkattr;    /* fork attribute group */
> +        ...;
> +    };
>
> -typedef struct attribute_set {
> -    ...;
> -} attribute_set_t;
> +    typedef struct attribute_set {
> +        ...;
> +    } attribute_set_t;
>
> -typedef struct attrreference {
> -    int attr_dataoffset;
> -    int attr_length;
> -    ...;
> -} attrreference_t;
> +    typedef struct attrreference {
> +        int attr_dataoffset;
> +        int attr_length;
> +        ...;
> +    } attrreference_t;
>
> -typedef int ... off_t;
> +    typedef int ... off_t;
>
> -typedef struct val_attrs {
> -    uint32_t          length;
> -    attribute_set_t   returned;
> -    attrreference_t   name_info;
> -    uint32_t          obj_type;
> -    struct timespec   mtime;
> -    uint32_t          accessmask;
> -    off_t             datalength;
> -    ...;
> -} val_attrs_t;
> +    typedef struct val_attrs {
> +        uint32_t          length;
> +        attribute_set_t   returned;
> +        attrreference_t   name_info;
> +        uint32_t          obj_type;
> +        struct timespec   mtime;
> +        uint32_t          accessmask;
> +        off_t             datalength;
> +        ...;
> +    } val_attrs_t;
>
> -/* the exact layout of the above struct will be figured out during build
> time */
> +    /* the exact layout of the above struct will be figured out during
> +       build time */
>
> -typedef int ... time_t;
> +    typedef int ... time_t;
>
> -typedef struct timespec {
> -    time_t tv_sec;
> -    ...;
> -};
> +    typedef struct timespec {
> +        time_t tv_sec;
> +        ...;
> +    };
>
> -int getattrlist(const char* path, struct attrlist * attrList, void *
> attrBuf,
> -                size_t attrBufSize, unsigned int options);
> +    int getattrlist(const char* path, struct attrlist * attrList,
> +                    void * attrBuf, size_t attrBufSize, unsigned int
> options);
>
> -int getattrlistbulk(int dirfd, struct attrlist * attrList, void * attrBuf,
> -                    size_t attrBufSize, uint64_t options);
> +    int getattrlistbulk(int dirfd, struct attrlist * attrList, void *
> attrBuf,
> +                        size_t attrBufSize, uint64_t options);
>
> -#define ATTR_BIT_MAP_COUNT ...
> -#define ATTR_CMN_NAME ...
> -#define ATTR_CMN_OBJTYPE ...
> -#define ATTR_CMN_MODTIME ...
> -#define ATTR_CMN_ACCESSMASK ...
> -#define ATTR_CMN_ERROR ...
> -#define ATTR_CMN_RETURNED_ATTRS ...
> -#define ATTR_FILE_DATALENGTH ...
> +    #define ATTR_BIT_MAP_COUNT ...
> +    #define ATTR_CMN_NAME ...
> +    #define ATTR_CMN_OBJTYPE ...
> +    #define ATTR_CMN_MODTIME ...
> +    #define ATTR_CMN_ACCESSMASK ...
> +    #define ATTR_CMN_ERROR ...
> +    #define ATTR_CMN_RETURNED_ATTRS ...
> +    #define ATTR_FILE_DATALENGTH ...
>
> -#define VREG ...
> -#define VDIR ...
> -#define VLNK ...
> -#define VBLK ...
> -#define VCHR ...
> -#define VFIFO ...
> -#define VSOCK ...
> +    #define VREG ...
> +    #define VDIR ...
> +    #define VLNK ...
> +    #define VBLK ...
> +    #define VCHR ...
> +    #define VFIFO ...
> +    #define VSOCK ...
>
> -#define S_IFMT ...
> +    #define S_IFMT ...
>
> -int open(const char *path, int oflag, int perm);
> -int close(int);
> +    int open(const char *path, int oflag, int perm);
> +    int close(int);
>
> -#define O_RDONLY ...
> -''')
> +    #define O_RDONLY ...
> +    ''')
> +
> +else:
> +
> +    ffi = cffi.FFI()
> +
> +    ffi.set_source("_osutil_cffi", """
> +    #define _GNU_SOURCE 1
> +    #include <sys/stat.h>
> +    #include <sys/types.h>
> +    #include <dirent.h>
> +    #include <fcntl.h>
> +    """)
> +
> +    ffi.cdef("""
> +    typedef ... DIR;
> +
> +    struct dirent {
> +        unsigned char  d_type;      /* Type of file; not supported
> +                                       by all filesystem types */
> +        char           d_name[...]; /* Null-terminated filename */
> +        ...;
> +    };
> +
> +    typedef int... mode_t;
> +    typedef int... off_t;
> +    typedef int... time_t;
> +
> +    struct timespec {
> +        time_t tv_sec;
> +        ...;
> +    };
> +
> +    struct stat {
> +        mode_t   st_mode;   /* inode protection mode */
> +        struct timespec st_mtim;  /* time of last data modification */
> +        off_t    st_size;   /* file size, in bytes */
> +        ...;
> +    };
> +
> +    DIR *fdopendir(int fd);
> +    struct dirent *readdir(DIR *dirp);
> +    int closedir(DIR *dirp);
> +
> +    #define AT_SYMLINK_NOFOLLOW ...
> +    #define DT_DIR ...
> +    #define DT_CHR ...
> +    #define DT_BLK ...
> +    #define DT_REG ...
> +    #define DT_FIFO ...
> +    #define DT_LNK ...
> +    #define DT_SOCK ...
> +
> +    int fstatat(int fd, const char *path, struct stat *buf, int flag);
> +    """)
>
>  if __name__ == '__main__':
>      ffi.compile()
>

Few questions.

First, why is this needed? I assume performance. But it isn't clear to me
what the benefit is.

Second, do you think we should import a backport of scandir from Python
3.5? I think that's essentially the same as what our custom listdir code is
doing. I'd rather we take a snapshot of upstream code, modify it to work on
Python 2.6/2.7, then call it a day until we drop Python 2 support in
several years.
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to