Re: [PATCH 2/3] ls: use statx for loop detection if it's available

2019-09-13 Thread Jeff Layton
On Fri, 2019-09-13 at 17:31 +0100, Pádraig Brady wrote:
> On 13/09/19 11:08, Jeff Layton wrote:
> > On Thu, 2019-09-12 at 11:58 -0600, Andreas Dilger wrote:
> > > Should this have a runtime fallback to stat() if statx() is not 
> > > implemented
> > > on the running kernel, or is that already handled at another level?
> > > 
> > 
> > glibc already does this fallback.
> 
> Note we need to be portable to a lot more than glibc
> 

To my knowledge, only glibc has a statx wrapper so far, so others should
just end up using the non-statx-enabled codepaths.

I'd argue that a libc implementation that provided a statx that didn't
fall back when run on an legacy kernel could be considered broken.

> In general this patchset looks really useful,
> and the performance testing on ceph is really appreciated.

Yeah, it's always nice to see when these sorts of patches make a marked
difference. While I didn't measure it, I suspect that the "ls" commands
themselves also ran faster in this test, since they wouldn't have had to
make as many round trips to the MDS.

Note that I expect this set may also help on NFS, and Andreas said this
should help Lustre too. Not sure about other netfs' yet, but I wouldn't
expect this to perform any worse than the stat() variant does anywhere.

Thanks,
-- 
Jeff Layton 




Re: [PATCH 2/3] ls: use statx for loop detection if it's available

2019-09-13 Thread Pádraig Brady
On 13/09/19 11:08, Jeff Layton wrote:
> On Thu, 2019-09-12 at 11:58 -0600, Andreas Dilger wrote:

>> Should this have a runtime fallback to stat() if statx() is not implemented
>> on the running kernel, or is that already handled at another level?
>>
> 
> glibc already does this fallback.

Note we need to be portable to a lot more than glibc

In general this patchset looks really useful,
and the performance testing on ceph is really appreciated.

thanks!
Pádraig



Re: [PATCH 2/3] ls: use statx for loop detection if it's available

2019-09-13 Thread Jeff Layton
On Thu, 2019-09-12 at 11:58 -0600, Andreas Dilger wrote:
> > On Sep 11, 2019, at 7:51 AM, Jeff Layton  wrote:
> > 
> > * move loop detection routine into separate function
> > * add a statx-enabled variant that is used when it's available. No need
> >  for a full stat since all we care about is the dev/ino.
> > * Since dev/ino should never change, set AT_STATX_DONT_SYNC
> >  unconditionally.
> 
> See inline...
> 
> > @@ -2711,27 +2718,54 @@ queue_directory (char const *name, char const 
> > *realname, bool command_line_arg)
> >   pending_dirs = new;
> > }
> > 
> > -/* Read directory NAME, and list the files in it.
> > -   If REALNAME is nonzero, print its name instead of NAME;
> > -   this is used for symbolic links to directories.
> > -   COMMAND_LINE_ARG means this directory was mentioned on the command 
> > line.  */
> > -
> > -static void
> > -print_dir (char const *name, char const *realname, bool command_line_arg)
> > +#if USE_STATX
> > +static bool
> > +loop_detected (const char *name, DIR *dirp, bool command_line_arg)
> > {
> > -  DIR *dirp;
> > -  struct dirent *next;
> > -  uintmax_t total_blocks = 0;
> > -  static bool first = true;
> > -
> > -  errno = 0;
> > -  dirp = opendir (name);
> > -  if (!dirp)
> > +  if (LOOP_DETECT)
> > {
> > -  file_failure (command_line_arg, _("cannot open directory %s"), name);
> > -  return;
> > -}
> > +  struct statx stx;
> > +  int fd = dirfd (dirp);
> > +  int flags = AT_STATX_DONT_SYNC;
> > +  const char *pathname = name;
> > +
> > +  if (0 <= fd)
> > +   {
> > + pathname = "";
> > + flags |= AT_EMPTY_PATH;
> > +   }
> > +  else
> > +{
> > + /* If dirfd failed, endure the overhead of statx by path. */
> > + fd = AT_FDCWD;
> > +   }
> > +
> > +  if (statx (fd, pathname, flags, STATX_INO, ) < 0)
> > +{
> > +  file_failure (command_line_arg,
> > +_("cannot determine device and inode of %s"), 
> > name);
> 
> Should this have a runtime fallback to stat() if statx() is not implemented
> on the running kernel, or is that already handled at another level?
> 

glibc already does this fallback.

> In that case, rather than splitting loop_detected() into two nearly-identical
> functions, it would be possible to do something like the following in a single
> loop_detected() function:
> 
> #ifdef USE_STATX
>   errno = 0;
>   /* try statx() first since it is lightweight. use stat() if unavailable 
> */
>   if (statx (fd, pathname, flags, STATX_INO, ) == 0
>   || (errno && errno != EOPNOTSUPP))
> {
>   if (errno)
> goto error;
>   ino = stx.stx_ino;
>   dev = stx.stx_dev;
> }
>   else
> #endif
>   if ((0 <= fd
>? fstat (fd, _stat)
>: stat (name, _stat) == 0)
> {
>   ino = dir_stat.st_ino;
>   dev = dir_stat.st_dev;
> }
>   else
> {
>   closedir (dirp);
> error:
>   file_failure (command_line_arg,
> _("cannot determine device and inode of %s"), name);
>   return false;
> }
> 
>   if (visit_dir (dev, ino))
> {
>   :
>   :
> 
> 

Yes, I think I'm going to rework the set to just add stat_for_ino and
fstat_for_ino functions and plug those into the existing code instead of
factoring out the loop detection like this.

I'll post a v2 set in a bit after I re-do some testing.

> > +{
> > +  return true;
> > +}
> > 
> > +  /* If we've already visited this dev/inode pair, warn that
> > + we've found a loop, and do not process this directory.  */
> > +  dev_t dev = makedev (stx.stx_dev_major, stx.stx_dev_minor);
> > +  if (visit_dir (dev, stx.stx_ino))
> > +{
> > +  error (0, 0, _("%s: not listing already-listed directory"),
> > + quotef (name));
> > +  set_exit_status (true);
> > +  return true;
> > +}
> > +
> > +  dev_ino_push (dev, stx.stx_ino);
> > +}
> > +  return false;
> > +}
> > +#else
> > +static bool
> > +loop_detected (const char *name, DIR *dirp, bool command_line_arg)
> > +{
> >   if (LOOP_DETECT)
> > {
> >   struct stat dir_stat;
> > @@ -2744,8 +2778,7 @@ print_dir (char const *name, char const *realname, 
> > bool command_line_arg)
> > {
> >   file_failure (command_line_arg,
> > _("cannot determine device and inode of %s"), name);
> > -  closedir (dirp);
> > -  return;
> > +  return true;
> > }
> > 
> >   /* If we've already visited this dev/inode pair, warn that
> > @@ -2754,13 +2787,42 @@ print_dir (char const *name, char const *realname, 
> > bool command_line_arg)
> > {
> >   error (0, 0, _("%s: not listing already-listed directory"),
> >  quotef (name));
> > -  closedir (dirp);
> >   

Re: [PATCH 2/3] ls: use statx for loop detection if it's available

2019-09-12 Thread Andreas Dilger

> On Sep 11, 2019, at 7:51 AM, Jeff Layton  wrote:
> 
> * move loop detection routine into separate function
> * add a statx-enabled variant that is used when it's available. No need
>  for a full stat since all we care about is the dev/ino.
> * Since dev/ino should never change, set AT_STATX_DONT_SYNC
>  unconditionally.

See inline...

> @@ -2711,27 +2718,54 @@ queue_directory (char const *name, char const 
> *realname, bool command_line_arg)
>   pending_dirs = new;
> }
> 
> -/* Read directory NAME, and list the files in it.
> -   If REALNAME is nonzero, print its name instead of NAME;
> -   this is used for symbolic links to directories.
> -   COMMAND_LINE_ARG means this directory was mentioned on the command line.  
> */
> -
> -static void
> -print_dir (char const *name, char const *realname, bool command_line_arg)
> +#if USE_STATX
> +static bool
> +loop_detected (const char *name, DIR *dirp, bool command_line_arg)
> {
> -  DIR *dirp;
> -  struct dirent *next;
> -  uintmax_t total_blocks = 0;
> -  static bool first = true;
> -
> -  errno = 0;
> -  dirp = opendir (name);
> -  if (!dirp)
> +  if (LOOP_DETECT)
> {
> -  file_failure (command_line_arg, _("cannot open directory %s"), name);
> -  return;
> -}
> +  struct statx stx;
> +  int fd = dirfd (dirp);
> +  int flags = AT_STATX_DONT_SYNC;
> +  const char *pathname = name;
> +
> +  if (0 <= fd)
> + {
> +   pathname = "";
> +   flags |= AT_EMPTY_PATH;
> + }
> +  else
> +{
> +   /* If dirfd failed, endure the overhead of statx by path. */
> +   fd = AT_FDCWD;
> + }
> +
> +  if (statx (fd, pathname, flags, STATX_INO, ) < 0)
> +{
> +  file_failure (command_line_arg,
> +_("cannot determine device and inode of %s"), name);

Should this have a runtime fallback to stat() if statx() is not implemented
on the running kernel, or is that already handled at another level?

In that case, rather than splitting loop_detected() into two nearly-identical
functions, it would be possible to do something like the following in a single
loop_detected() function:

#ifdef USE_STATX
  errno = 0;
  /* try statx() first since it is lightweight. use stat() if unavailable */
  if (statx (fd, pathname, flags, STATX_INO, ) == 0
  || (errno && errno != EOPNOTSUPP))
{
  if (errno)
goto error;
  ino = stx.stx_ino;
  dev = stx.stx_dev;
}
  else
#endif
  if ((0 <= fd
   ? fstat (fd, _stat)
   : stat (name, _stat) == 0)
{
  ino = dir_stat.st_ino;
  dev = dir_stat.st_dev;
}
  else
{
  closedir (dirp);
error:
  file_failure (command_line_arg,
_("cannot determine device and inode of %s"), name);
  return false;
}

  if (visit_dir (dev, ino))
{
  :
  :


> +{
> +  return true;
> +}
> 
> +  /* If we've already visited this dev/inode pair, warn that
> + we've found a loop, and do not process this directory.  */
> +  dev_t dev = makedev (stx.stx_dev_major, stx.stx_dev_minor);
> +  if (visit_dir (dev, stx.stx_ino))
> +{
> +  error (0, 0, _("%s: not listing already-listed directory"),
> + quotef (name));
> +  set_exit_status (true);
> +  return true;
> +}
> +
> +  dev_ino_push (dev, stx.stx_ino);
> +}
> +  return false;
> +}
> +#else
> +static bool
> +loop_detected (const char *name, DIR *dirp, bool command_line_arg)
> +{
>   if (LOOP_DETECT)
> {
>   struct stat dir_stat;
> @@ -2744,8 +2778,7 @@ print_dir (char const *name, char const *realname, bool 
> command_line_arg)
> {
>   file_failure (command_line_arg,
> _("cannot determine device and inode of %s"), name);
> -  closedir (dirp);
> -  return;
> +  return true;
> }
> 
>   /* If we've already visited this dev/inode pair, warn that
> @@ -2754,13 +2787,42 @@ print_dir (char const *name, char const *realname, 
> bool command_line_arg)
> {
>   error (0, 0, _("%s: not listing already-listed directory"),
>  quotef (name));
> -  closedir (dirp);
>   set_exit_status (true);
> -  return;
> +  return true;
> }
> 
>   dev_ino_push (dir_stat.st_dev, dir_stat.st_ino);
> }
> +  return false;
> +}
> +#endif
> +
> +/* Read directory NAME, and list the files in it.
> +   If REALNAME is nonzero, print its name instead of NAME;
> +   this is used for symbolic links to directories.
> +   COMMAND_LINE_ARG means this directory was mentioned on the command line.  
> */
> +
> +static void
> +print_dir (char const *name, char const *realname, bool command_line_arg)
> +{
> +  DIR *dirp;
> +  struct dirent *next;
> +  uintmax_t total_blocks = 0;
> +  static bool 

[PATCH 2/3] ls: use statx for loop detection if it's available

2019-09-11 Thread Jeff Layton
* move loop detection routine into separate function
* add a statx-enabled variant that is used when it's available. No need
  for a full stat since all we care about is the dev/ino.
* Since dev/ino should never change, set AT_STATX_DONT_SYNC
  unconditionally.
---
 src/ls.c | 106 +++
 1 file changed, 84 insertions(+), 22 deletions(-)

diff --git a/src/ls.c b/src/ls.c
index 120ce153e340..8e5015e5ac12 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -114,6 +114,13 @@
 #include "xgethostname.h"
 #include "c-ctype.h"
 #include "canonicalize.h"
+#include "statx.h"
+
+#if HAVE_STATX && defined STATX_INO
+# define USE_STATX 1
+#else
+# define USE_STATX 0
+#endif
 
 /* Include  last to avoid a clash of 
include guards with some premature versions of libcap.
@@ -2711,27 +2718,54 @@ queue_directory (char const *name, char const 
*realname, bool command_line_arg)
   pending_dirs = new;
 }
 
-/* Read directory NAME, and list the files in it.
-   If REALNAME is nonzero, print its name instead of NAME;
-   this is used for symbolic links to directories.
-   COMMAND_LINE_ARG means this directory was mentioned on the command line.  */
-
-static void
-print_dir (char const *name, char const *realname, bool command_line_arg)
+#if USE_STATX
+static bool
+loop_detected (const char *name, DIR *dirp, bool command_line_arg)
 {
-  DIR *dirp;
-  struct dirent *next;
-  uintmax_t total_blocks = 0;
-  static bool first = true;
-
-  errno = 0;
-  dirp = opendir (name);
-  if (!dirp)
+  if (LOOP_DETECT)
 {
-  file_failure (command_line_arg, _("cannot open directory %s"), name);
-  return;
-}
+  struct statx stx;
+  int fd = dirfd (dirp);
+  int flags = AT_STATX_DONT_SYNC;
+  const char *pathname = name;
+
+  if (0 <= fd)
+   {
+ pathname = "";
+ flags |= AT_EMPTY_PATH;
+   }
+  else
+{
+ /* If dirfd failed, endure the overhead of statx by path. */
+ fd = AT_FDCWD;
+   }
+
+  if (statx (fd, pathname, flags, STATX_INO, ) < 0)
+{
+  file_failure (command_line_arg,
+_("cannot determine device and inode of %s"), name);
+  return true;
+}
 
+  /* If we've already visited this dev/inode pair, warn that
+ we've found a loop, and do not process this directory.  */
+  dev_t dev = makedev (stx.stx_dev_major, stx.stx_dev_minor);
+  if (visit_dir (dev, stx.stx_ino))
+{
+  error (0, 0, _("%s: not listing already-listed directory"),
+ quotef (name));
+  set_exit_status (true);
+  return true;
+}
+
+  dev_ino_push (dev, stx.stx_ino);
+}
+  return false;
+}
+#else
+static bool
+loop_detected (const char *name, DIR *dirp, bool command_line_arg)
+{
   if (LOOP_DETECT)
 {
   struct stat dir_stat;
@@ -2744,8 +2778,7 @@ print_dir (char const *name, char const *realname, bool 
command_line_arg)
 {
   file_failure (command_line_arg,
 _("cannot determine device and inode of %s"), name);
-  closedir (dirp);
-  return;
+  return true;
 }
 
   /* If we've already visited this dev/inode pair, warn that
@@ -2754,13 +2787,42 @@ print_dir (char const *name, char const *realname, bool 
command_line_arg)
 {
   error (0, 0, _("%s: not listing already-listed directory"),
  quotef (name));
-  closedir (dirp);
   set_exit_status (true);
-  return;
+  return true;
 }
 
   dev_ino_push (dir_stat.st_dev, dir_stat.st_ino);
 }
+  return false;
+}
+#endif
+
+/* Read directory NAME, and list the files in it.
+   If REALNAME is nonzero, print its name instead of NAME;
+   this is used for symbolic links to directories.
+   COMMAND_LINE_ARG means this directory was mentioned on the command line.  */
+
+static void
+print_dir (char const *name, char const *realname, bool command_line_arg)
+{
+  DIR *dirp;
+  struct dirent *next;
+  uintmax_t total_blocks = 0;
+  static bool first = true;
+
+  errno = 0;
+  dirp = opendir (name);
+  if (!dirp)
+{
+  file_failure (command_line_arg, _("cannot open directory %s"), name);
+  return;
+}
+
+  if (loop_detected(name, dirp, command_line_arg))
+{
+  closedir (dirp);
+  return;
+}
 
   clear_files ();
 
-- 
2.21.0