Re: [PATCH 2/3] ls: use statx for loop detection if it's available
On Fri, 2019-09-13 at 17:31 +0100, Pádraig Brady wrote: > On 13/09/19 11:08, Jeff Layton wrote: > > On Thu, 2019-09-12 at 11:58 -0600, Andreas Dilger wrote: > > > Should this have a runtime fallback to stat() if statx() is not > > > implemented > > > on the running kernel, or is that already handled at another level? > > > > > > > glibc already does this fallback. > > Note we need to be portable to a lot more than glibc > To my knowledge, only glibc has a statx wrapper so far, so others should just end up using the non-statx-enabled codepaths. I'd argue that a libc implementation that provided a statx that didn't fall back when run on an legacy kernel could be considered broken. > In general this patchset looks really useful, > and the performance testing on ceph is really appreciated. Yeah, it's always nice to see when these sorts of patches make a marked difference. While I didn't measure it, I suspect that the "ls" commands themselves also ran faster in this test, since they wouldn't have had to make as many round trips to the MDS. Note that I expect this set may also help on NFS, and Andreas said this should help Lustre too. Not sure about other netfs' yet, but I wouldn't expect this to perform any worse than the stat() variant does anywhere. Thanks, -- Jeff Layton
Re: [PATCH 2/3] ls: use statx for loop detection if it's available
On 13/09/19 11:08, Jeff Layton wrote: > On Thu, 2019-09-12 at 11:58 -0600, Andreas Dilger wrote: >> Should this have a runtime fallback to stat() if statx() is not implemented >> on the running kernel, or is that already handled at another level? >> > > glibc already does this fallback. Note we need to be portable to a lot more than glibc In general this patchset looks really useful, and the performance testing on ceph is really appreciated. thanks! Pádraig
Re: [PATCH 2/3] ls: use statx for loop detection if it's available
On Thu, 2019-09-12 at 11:58 -0600, Andreas Dilger wrote: > > On Sep 11, 2019, at 7:51 AM, Jeff Layton wrote: > > > > * move loop detection routine into separate function > > * add a statx-enabled variant that is used when it's available. No need > > for a full stat since all we care about is the dev/ino. > > * Since dev/ino should never change, set AT_STATX_DONT_SYNC > > unconditionally. > > See inline... > > > @@ -2711,27 +2718,54 @@ queue_directory (char const *name, char const > > *realname, bool command_line_arg) > > pending_dirs = new; > > } > > > > -/* Read directory NAME, and list the files in it. > > - If REALNAME is nonzero, print its name instead of NAME; > > - this is used for symbolic links to directories. > > - COMMAND_LINE_ARG means this directory was mentioned on the command > > line. */ > > - > > -static void > > -print_dir (char const *name, char const *realname, bool command_line_arg) > > +#if USE_STATX > > +static bool > > +loop_detected (const char *name, DIR *dirp, bool command_line_arg) > > { > > - DIR *dirp; > > - struct dirent *next; > > - uintmax_t total_blocks = 0; > > - static bool first = true; > > - > > - errno = 0; > > - dirp = opendir (name); > > - if (!dirp) > > + if (LOOP_DETECT) > > { > > - file_failure (command_line_arg, _("cannot open directory %s"), name); > > - return; > > -} > > + struct statx stx; > > + int fd = dirfd (dirp); > > + int flags = AT_STATX_DONT_SYNC; > > + const char *pathname = name; > > + > > + if (0 <= fd) > > + { > > + pathname = ""; > > + flags |= AT_EMPTY_PATH; > > + } > > + else > > +{ > > + /* If dirfd failed, endure the overhead of statx by path. */ > > + fd = AT_FDCWD; > > + } > > + > > + if (statx (fd, pathname, flags, STATX_INO, ) < 0) > > +{ > > + file_failure (command_line_arg, > > +_("cannot determine device and inode of %s"), > > name); > > Should this have a runtime fallback to stat() if statx() is not implemented > on the running kernel, or is that already handled at another level? > glibc already does this fallback. > In that case, rather than splitting loop_detected() into two nearly-identical > functions, it would be possible to do something like the following in a single > loop_detected() function: > > #ifdef USE_STATX > errno = 0; > /* try statx() first since it is lightweight. use stat() if unavailable > */ > if (statx (fd, pathname, flags, STATX_INO, ) == 0 > || (errno && errno != EOPNOTSUPP)) > { > if (errno) > goto error; > ino = stx.stx_ino; > dev = stx.stx_dev; > } > else > #endif > if ((0 <= fd >? fstat (fd, _stat) >: stat (name, _stat) == 0) > { > ino = dir_stat.st_ino; > dev = dir_stat.st_dev; > } > else > { > closedir (dirp); > error: > file_failure (command_line_arg, > _("cannot determine device and inode of %s"), name); > return false; > } > > if (visit_dir (dev, ino)) > { > : > : > > Yes, I think I'm going to rework the set to just add stat_for_ino and fstat_for_ino functions and plug those into the existing code instead of factoring out the loop detection like this. I'll post a v2 set in a bit after I re-do some testing. > > +{ > > + return true; > > +} > > > > + /* If we've already visited this dev/inode pair, warn that > > + we've found a loop, and do not process this directory. */ > > + dev_t dev = makedev (stx.stx_dev_major, stx.stx_dev_minor); > > + if (visit_dir (dev, stx.stx_ino)) > > +{ > > + error (0, 0, _("%s: not listing already-listed directory"), > > + quotef (name)); > > + set_exit_status (true); > > + return true; > > +} > > + > > + dev_ino_push (dev, stx.stx_ino); > > +} > > + return false; > > +} > > +#else > > +static bool > > +loop_detected (const char *name, DIR *dirp, bool command_line_arg) > > +{ > > if (LOOP_DETECT) > > { > > struct stat dir_stat; > > @@ -2744,8 +2778,7 @@ print_dir (char const *name, char const *realname, > > bool command_line_arg) > > { > > file_failure (command_line_arg, > > _("cannot determine device and inode of %s"), name); > > - closedir (dirp); > > - return; > > + return true; > > } > > > > /* If we've already visited this dev/inode pair, warn that > > @@ -2754,13 +2787,42 @@ print_dir (char const *name, char const *realname, > > bool command_line_arg) > > { > > error (0, 0, _("%s: not listing already-listed directory"), > > quotef (name)); > > - closedir (dirp); > >
Re: [PATCH 2/3] ls: use statx for loop detection if it's available
> On Sep 11, 2019, at 7:51 AM, Jeff Layton wrote: > > * move loop detection routine into separate function > * add a statx-enabled variant that is used when it's available. No need > for a full stat since all we care about is the dev/ino. > * Since dev/ino should never change, set AT_STATX_DONT_SYNC > unconditionally. See inline... > @@ -2711,27 +2718,54 @@ queue_directory (char const *name, char const > *realname, bool command_line_arg) > pending_dirs = new; > } > > -/* Read directory NAME, and list the files in it. > - If REALNAME is nonzero, print its name instead of NAME; > - this is used for symbolic links to directories. > - COMMAND_LINE_ARG means this directory was mentioned on the command line. > */ > - > -static void > -print_dir (char const *name, char const *realname, bool command_line_arg) > +#if USE_STATX > +static bool > +loop_detected (const char *name, DIR *dirp, bool command_line_arg) > { > - DIR *dirp; > - struct dirent *next; > - uintmax_t total_blocks = 0; > - static bool first = true; > - > - errno = 0; > - dirp = opendir (name); > - if (!dirp) > + if (LOOP_DETECT) > { > - file_failure (command_line_arg, _("cannot open directory %s"), name); > - return; > -} > + struct statx stx; > + int fd = dirfd (dirp); > + int flags = AT_STATX_DONT_SYNC; > + const char *pathname = name; > + > + if (0 <= fd) > + { > + pathname = ""; > + flags |= AT_EMPTY_PATH; > + } > + else > +{ > + /* If dirfd failed, endure the overhead of statx by path. */ > + fd = AT_FDCWD; > + } > + > + if (statx (fd, pathname, flags, STATX_INO, ) < 0) > +{ > + file_failure (command_line_arg, > +_("cannot determine device and inode of %s"), name); Should this have a runtime fallback to stat() if statx() is not implemented on the running kernel, or is that already handled at another level? In that case, rather than splitting loop_detected() into two nearly-identical functions, it would be possible to do something like the following in a single loop_detected() function: #ifdef USE_STATX errno = 0; /* try statx() first since it is lightweight. use stat() if unavailable */ if (statx (fd, pathname, flags, STATX_INO, ) == 0 || (errno && errno != EOPNOTSUPP)) { if (errno) goto error; ino = stx.stx_ino; dev = stx.stx_dev; } else #endif if ((0 <= fd ? fstat (fd, _stat) : stat (name, _stat) == 0) { ino = dir_stat.st_ino; dev = dir_stat.st_dev; } else { closedir (dirp); error: file_failure (command_line_arg, _("cannot determine device and inode of %s"), name); return false; } if (visit_dir (dev, ino)) { : : > +{ > + return true; > +} > > + /* If we've already visited this dev/inode pair, warn that > + we've found a loop, and do not process this directory. */ > + dev_t dev = makedev (stx.stx_dev_major, stx.stx_dev_minor); > + if (visit_dir (dev, stx.stx_ino)) > +{ > + error (0, 0, _("%s: not listing already-listed directory"), > + quotef (name)); > + set_exit_status (true); > + return true; > +} > + > + dev_ino_push (dev, stx.stx_ino); > +} > + return false; > +} > +#else > +static bool > +loop_detected (const char *name, DIR *dirp, bool command_line_arg) > +{ > if (LOOP_DETECT) > { > struct stat dir_stat; > @@ -2744,8 +2778,7 @@ print_dir (char const *name, char const *realname, bool > command_line_arg) > { > file_failure (command_line_arg, > _("cannot determine device and inode of %s"), name); > - closedir (dirp); > - return; > + return true; > } > > /* If we've already visited this dev/inode pair, warn that > @@ -2754,13 +2787,42 @@ print_dir (char const *name, char const *realname, > bool command_line_arg) > { > error (0, 0, _("%s: not listing already-listed directory"), > quotef (name)); > - closedir (dirp); > set_exit_status (true); > - return; > + return true; > } > > dev_ino_push (dir_stat.st_dev, dir_stat.st_ino); > } > + return false; > +} > +#endif > + > +/* Read directory NAME, and list the files in it. > + If REALNAME is nonzero, print its name instead of NAME; > + this is used for symbolic links to directories. > + COMMAND_LINE_ARG means this directory was mentioned on the command line. > */ > + > +static void > +print_dir (char const *name, char const *realname, bool command_line_arg) > +{ > + DIR *dirp; > + struct dirent *next; > + uintmax_t total_blocks = 0; > + static bool
[PATCH 2/3] ls: use statx for loop detection if it's available
* move loop detection routine into separate function * add a statx-enabled variant that is used when it's available. No need for a full stat since all we care about is the dev/ino. * Since dev/ino should never change, set AT_STATX_DONT_SYNC unconditionally. --- src/ls.c | 106 +++ 1 file changed, 84 insertions(+), 22 deletions(-) diff --git a/src/ls.c b/src/ls.c index 120ce153e340..8e5015e5ac12 100644 --- a/src/ls.c +++ b/src/ls.c @@ -114,6 +114,13 @@ #include "xgethostname.h" #include "c-ctype.h" #include "canonicalize.h" +#include "statx.h" + +#if HAVE_STATX && defined STATX_INO +# define USE_STATX 1 +#else +# define USE_STATX 0 +#endif /* Include last to avoid a clash of include guards with some premature versions of libcap. @@ -2711,27 +2718,54 @@ queue_directory (char const *name, char const *realname, bool command_line_arg) pending_dirs = new; } -/* Read directory NAME, and list the files in it. - If REALNAME is nonzero, print its name instead of NAME; - this is used for symbolic links to directories. - COMMAND_LINE_ARG means this directory was mentioned on the command line. */ - -static void -print_dir (char const *name, char const *realname, bool command_line_arg) +#if USE_STATX +static bool +loop_detected (const char *name, DIR *dirp, bool command_line_arg) { - DIR *dirp; - struct dirent *next; - uintmax_t total_blocks = 0; - static bool first = true; - - errno = 0; - dirp = opendir (name); - if (!dirp) + if (LOOP_DETECT) { - file_failure (command_line_arg, _("cannot open directory %s"), name); - return; -} + struct statx stx; + int fd = dirfd (dirp); + int flags = AT_STATX_DONT_SYNC; + const char *pathname = name; + + if (0 <= fd) + { + pathname = ""; + flags |= AT_EMPTY_PATH; + } + else +{ + /* If dirfd failed, endure the overhead of statx by path. */ + fd = AT_FDCWD; + } + + if (statx (fd, pathname, flags, STATX_INO, ) < 0) +{ + file_failure (command_line_arg, +_("cannot determine device and inode of %s"), name); + return true; +} + /* If we've already visited this dev/inode pair, warn that + we've found a loop, and do not process this directory. */ + dev_t dev = makedev (stx.stx_dev_major, stx.stx_dev_minor); + if (visit_dir (dev, stx.stx_ino)) +{ + error (0, 0, _("%s: not listing already-listed directory"), + quotef (name)); + set_exit_status (true); + return true; +} + + dev_ino_push (dev, stx.stx_ino); +} + return false; +} +#else +static bool +loop_detected (const char *name, DIR *dirp, bool command_line_arg) +{ if (LOOP_DETECT) { struct stat dir_stat; @@ -2744,8 +2778,7 @@ print_dir (char const *name, char const *realname, bool command_line_arg) { file_failure (command_line_arg, _("cannot determine device and inode of %s"), name); - closedir (dirp); - return; + return true; } /* If we've already visited this dev/inode pair, warn that @@ -2754,13 +2787,42 @@ print_dir (char const *name, char const *realname, bool command_line_arg) { error (0, 0, _("%s: not listing already-listed directory"), quotef (name)); - closedir (dirp); set_exit_status (true); - return; + return true; } dev_ino_push (dir_stat.st_dev, dir_stat.st_ino); } + return false; +} +#endif + +/* Read directory NAME, and list the files in it. + If REALNAME is nonzero, print its name instead of NAME; + this is used for symbolic links to directories. + COMMAND_LINE_ARG means this directory was mentioned on the command line. */ + +static void +print_dir (char const *name, char const *realname, bool command_line_arg) +{ + DIR *dirp; + struct dirent *next; + uintmax_t total_blocks = 0; + static bool first = true; + + errno = 0; + dirp = opendir (name); + if (!dirp) +{ + file_failure (command_line_arg, _("cannot open directory %s"), name); + return; +} + + if (loop_detected(name, dirp, command_line_arg)) +{ + closedir (dirp); + return; +} clear_files (); -- 2.21.0