On 09/04/2021 23:51, Pádraig Brady wrote:
On 09/04/2021 13:02, Carl Edquist wrote:
Dear Coreutils Maintainers,

I'd like to introduce my favorite 'ls' option, '-W', which I have been
enjoying using regularly over the last few years.

The concept is just to sort filenames by their printed widths.


(If this sounds odd, I invite you hear it out, try and see for yourself!)


I am including a patch with my implementation and accompanying tests - as
well as some sample output.  And I'll happily field any requests for
improvements.

I quite like this. It seems useful.
Also doing outside of ls is quite awkward,
especially considering multi column output.

I would avoid the -W short option, as that would clash with ls from FreeBSD for 
example.
It's probably best to not provide a short option for this at all.

Playing around with this a bit more,
I really like how much more concise it makes output in general.

I've attached two patches which I'll apply tomorrow at some stage.

The first adjusts your patch to:
  Remove -W short option.
  Fix crash on systems with statx().
  s/filename/file name/.
  Expand in texinfo that --sort=width is most useful with -C (the default)

The second is a performance improvement,
as we're now calling quote_name_width a lot more.

thanks!
Pádraig
>From 7ebe34528c241903b3253e8366fe596db0869d21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Sat, 10 Apr 2021 16:54:03 +0100
Subject: [PATCH 2/2] ls: cache name width determination

This is especially important now for --sort=width,
as that can greatly increase how often this
expensive quote_name_width() function is called per file.

This also helps the default invocation of ls,
or specifically the --format={across,vertical} cases
(when --width is not set to 0),
to avoid two calls to this function per file.

Note the only case where we later compute the width,
is for --format=commas.  That's only done once though,
so we leave the computation close to use to
maximize hardware caching.

* src/ls.c (struct fileinfo): Add a WIDTH member to cache
the screen width of the file name.
(update_current_files_cache): Set the WIDTH members for cases
they're needed multiple times.  Note we do this explicitly here,
rather than caching at use, so that the fileinfo
structures can remain const in the sorting and presentation functions.
(sort_files): Call the new update_current_files_cache() in this
initialization function.
(fileinfo_name_width): Renamed from fileinfo_width,
and adjusted to return the cached value if available.
---
 src/ls.c | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/ls.c b/src/ls.c
index b9fde25d5..b4edf620f 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -235,6 +235,9 @@ struct fileinfo
 
     /* Whether file name needs quoting. tri-state with -1 == unknown.  */
     int quoted;
+
+    /* Cached screen width (including quoting).  */
+    size_t width;
   };
 
 #define LEN_STR_PAIR(s) sizeof (s) - 1, s
@@ -3883,17 +3886,22 @@ cmp_extension (struct fileinfo const *a, struct fileinfo const *b,
   return diff ? diff : cmp (a->name, b->name);
 }
 
+/* Return the (cached) screen width,
+   for the NAME associated with the passed fileinfo F.  */
+
 static inline size_t
-fileinfo_width (struct fileinfo const *f)
+fileinfo_name_width (struct fileinfo const *f)
 {
-  return quote_name_width (f->name, filename_quoting_options, f->quoted);
+  return f->width
+         ? f->width
+         : quote_name_width (f->name, filename_quoting_options, f->quoted);
 }
 
 static inline int
 cmp_width (struct fileinfo const *a, struct fileinfo const *b,
           int (*cmp) (char const *, char const *))
 {
-  int diff = fileinfo_width (a) - fileinfo_width (b);
+  int diff = fileinfo_name_width (a) - fileinfo_name_width (b);
   return diff ? diff : cmp (a->name, b->name);
 }
 
@@ -4003,6 +4011,24 @@ initialize_ordering_vector (void)
     sorted_file[i] = &cwd_file[i];
 }
 
+/* Cache values based on attributes global to all files.  */
+
+static void
+update_current_files_cache (void)
+{
+  size_t i;
+
+  for (i = 0; i < cwd_n_used; i++)
+    {
+      struct fileinfo *f = sorted_file[i];
+
+      /* Cache screen width of name, if needed multiple times.  */
+      if (sort_type == sort_width
+          || (line_length && (format == many_per_line || format == horizontal)))
+        f->width = fileinfo_name_width (f);
+    }
+}
+
 /* Sort the files now in the table.  */
 
 static void
@@ -4019,6 +4045,8 @@ sort_files (void)
 
   initialize_ordering_vector ();
 
+  update_current_files_cache ();
+
   if (sort_type == sort_none)
     return;
 
@@ -5055,7 +5083,7 @@ length_of_file_name_and_frills (const struct fileinfo *f)
   if (print_scontext)
     len += 1 + (format == with_commas ? strlen (f->scontext) : scontext_width);
 
-  len += quote_name_width (f->name, filename_quoting_options, f->quoted);
+  len += fileinfo_name_width (f);
 
   if (indicator_style != none)
     {
-- 
2.26.2

>From dbb7874fccfa262457e2017dceff6039236ca401 Mon Sep 17 00:00:00 2001
From: Carl Edquist <edqu...@cs.wisc.edu>
Date: Fri, 26 Mar 2021 04:27:54 -0500
Subject: [PATCH 1/2] ls: add --sort=width option to sort by file name width

This helps identify the outliers for long filenames, and also produces
a more compact display of columns when listing a directory with many
entries of various widths.

* src/ls.c (sort_type, sort_types, sort_width): New sort_width sort
type.
(sort_args): Add "width" sort arg.
(cmp_width, fileinfo_width): New sort function and helper for file name
width.
(quote_name_width): Add function prototype declaration.
(usage): Document --sort=width option.
* doc/coreutils.texi: Document --sort=width option.
* tests/ls/sort-width-option.sh: New test for --sort=width option.
* tests/local.mk: Reference new test.
* NEWS: Mention the new feature.
---
 NEWS                          |  2 ++
 doc/coreutils.texi            |  7 ++++++
 src/ls.c                      | 28 +++++++++++++++++++++---
 tests/local.mk                |  1 +
 tests/ls/sort-width-option.sh | 41 +++++++++++++++++++++++++++++++++++
 5 files changed, 76 insertions(+), 3 deletions(-)
 create mode 100755 tests/ls/sort-width-option.sh

diff --git a/NEWS b/NEWS
index 802f4b427..dbe34fc38 100644
--- a/NEWS
+++ b/NEWS
@@ -70,6 +70,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   ls --classify now supports the "always", "auto", or "never" flags,
   to support only outputting classifier characters if connected to a tty.
 
+  ls now accepts the --sort=width option, to sort by file name width.
+
   nl --line-increment can now take a negative number to decrement the count.
 
 ** Improvements
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 06ecdd74c..e53c0de6e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -7939,6 +7939,13 @@ Sort by version name and number, lowest first.  It behaves like a default
 sort, except that each sequence of decimal digits is treated numerically
 as an index/version number.  (@xref{Version sort ordering}.)
 
+@item --sort=width
+@opindex --sort
+@opindex width@r{, sorting option for @command{ls}}
+Sort by printed width of file names.
+This is especially useful with the default @option{--format=vertical}
+output format, to most densely display the listed files.
+
 @item -X
 @itemx --sort=extension
 @opindex -X
diff --git a/src/ls.c b/src/ls.c
index 2d0450e54..b9fde25d5 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -307,6 +307,10 @@ static void parse_ls_color (void);
 
 static void getenv_quoting_style (void);
 
+static size_t quote_name_width (const char *name,
+                                struct quoting_options const *options,
+                                int needs_general_quoting);
+
 /* Initial size of hash table.
    Most hierarchies are likely to be shallower than this.  */
 #define INITIAL_TABLE_SIZE 30
@@ -475,6 +479,7 @@ enum sort_type
     sort_none = -1,		/* -U */
     sort_name,			/* default */
     sort_extension,		/* -X */
+    sort_width,
     sort_size,			/* -S */
     sort_version,		/* -v */
     sort_time,			/* -t */
@@ -903,11 +908,11 @@ ARGMATCH_VERIFY (format_args, format_types);
 
 static char const *const sort_args[] =
 {
-  "none", "time", "size", "extension", "version", NULL
+  "none", "time", "size", "extension", "version", "width", NULL
 };
 static enum sort_type const sort_types[] =
 {
-  sort_none, sort_time, sort_size, sort_extension, sort_version
+  sort_none, sort_time, sort_size, sort_extension, sort_version, sort_width
 };
 ARGMATCH_VERIFY (sort_args, sort_types);
 
@@ -1134,6 +1139,7 @@ calc_req_mask (void)
     case sort_name:
     case sort_version:
     case sort_extension:
+    case sort_width:
       break;
     case sort_time:
       mask |= time_type_to_statx ();
@@ -3877,6 +3883,20 @@ cmp_extension (struct fileinfo const *a, struct fileinfo const *b,
   return diff ? diff : cmp (a->name, b->name);
 }
 
+static inline size_t
+fileinfo_width (struct fileinfo const *f)
+{
+  return quote_name_width (f->name, filename_quoting_options, f->quoted);
+}
+
+static inline int
+cmp_width (struct fileinfo const *a, struct fileinfo const *b,
+          int (*cmp) (char const *, char const *))
+{
+  int diff = fileinfo_width (a) - fileinfo_width (b);
+  return diff ? diff : cmp (a->name, b->name);
+}
+
 DEFINE_SORT_FUNCTIONS (ctime, cmp_ctime)
 DEFINE_SORT_FUNCTIONS (mtime, cmp_mtime)
 DEFINE_SORT_FUNCTIONS (atime, cmp_atime)
@@ -3884,6 +3904,7 @@ DEFINE_SORT_FUNCTIONS (btime, cmp_btime)
 DEFINE_SORT_FUNCTIONS (size, cmp_size)
 DEFINE_SORT_FUNCTIONS (name, cmp_name)
 DEFINE_SORT_FUNCTIONS (extension, cmp_extension)
+DEFINE_SORT_FUNCTIONS (width, cmp_width)
 
 /* Compare file versions.
    Unlike all other compare functions above, cmp_version depends only
@@ -3936,6 +3957,7 @@ static qsortFunc const sort_functions[][2][2][2] =
   {
     LIST_SORTFUNCTION_VARIANTS (name),
     LIST_SORTFUNCTION_VARIANTS (extension),
+    LIST_SORTFUNCTION_VARIANTS (width),
     LIST_SORTFUNCTION_VARIANTS (size),
 
     {
@@ -5454,7 +5476,7 @@ Sort entries alphabetically if none of -cftuvSUX nor --sort is specified.\n\
   -S                         sort by file size, largest first\n\
       --sort=WORD            sort by WORD instead of name: none (-U), size (-S)\
 ,\n\
-                               time (-t), version (-v), extension (-X)\n\
+                               time (-t), version (-v), extension (-X), width\n\
       --time=WORD            change the default of using modification times;\n\
                                access time (-u): atime, access, use;\n\
                                change time (-c): ctime, status;\n\
diff --git a/tests/local.mk b/tests/local.mk
index 27e31ec8e..a44feca73 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -632,6 +632,7 @@ all_tests =					\
   tests/ls/symlink-quote.sh			\
   tests/ls/symlink-slash.sh			\
   tests/ls/time-style-diag.sh			\
+  tests/ls/sort-width-option.sh			\
   tests/ls/x-option.sh				\
   tests/ls/hyperlink.sh				\
   tests/mkdir/p-1.sh				\
diff --git a/tests/ls/sort-width-option.sh b/tests/ls/sort-width-option.sh
new file mode 100755
index 000000000..ab7cb6ad9
--- /dev/null
+++ b/tests/ls/sort-width-option.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+# Exercise the --sort=width option.
+
+# Copyright (C) 2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ ls
+
+mkdir subdir       || framework_failure_
+touch subdir/aaaaa || framework_failure_
+touch subdir/bbb   || framework_failure_
+touch subdir/cccc  || framework_failure_
+touch subdir/d     || framework_failure_
+touch subdir/zz    || framework_failure_
+
+
+ls --sort=width subdir > out || fail=1
+cat <<\EOF > exp || framework_failure_
+d
+zz
+bbb
+cccc
+aaaaa
+EOF
+
+compare exp out || fail=1
+
+Exit $fail
-- 
2.26.2

Reply via email to