On 10/05/2022 15:21, Pádraig Brady wrote:
On 10/05/2022 09:45, Mikael Magnusson wrote:
Hi,

In bug #9086 [1] there was a lot of good discussion about case
insensitive matching for dircolors. This was all then summarily
ignored in [2] which made the behaviour unconditional. This breaks
coloring of uppercase files that should have a different color from
lowercase files, for example .C which is a c++ source file, not a c
source file. And as mentioned in [1] it is also very useful to notice
incorrectly (eg uppercase extension files) quickly by them being
white, so you can fix them.

I'm not sure if the suggestion in this bug (33123) is good though;
since ls --color=auto-case will just error on older releases, you
can't easily sync your aliases. Unfortunately ls also ignores all of
LS_COLORS if it contains any unknown values (why??) so we can't add a
"CS" field there for case sensitiveness either (unless there's some
extra options field already I don't know about?).

If anyone has some ideas I would be happy to produce a patch (assuming
you don't need copyright assignment).

[1] https://debbugs.gnu.org/cgi/bugreport.cgi?bug=9086
[2] https://lists.gnu.org/archive/html/coreutils/2018-06/msg00011.html

Thanks for taking the time to tie all these threads together.

Yes we forgot about [1] when implementing [2].

The ideas in [1] are sound. I.e. we shouldn't need a new option.
We just need to honor any capitalized extension entries,
and fall back to case insensitive for lower case extensions.
We do need to be cognizant of performance though.
We could sort the list of extensions, which would auto give
precedence to capitalized entries, and we might also use that
sorting to break out of the match loop early.

The attached will honor different sequences defined for
separate extension letter cases, by operating case sensitively
for those extensions.

cheers,
Pádraig
From 2c1daa5dbe226852417b9721f799e0939d640ea0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Sun, 4 Sep 2022 19:59:25 +0100
Subject: [PATCH] ls: --color: honor separate sequences for extension cases

Following on from commit v8.29-45-g24053fbd8 which unconditionally
used case insensitive extension matching, support selective
case sensitive matching when there are separate extension cases
defined with different display sequences.

* src/ls.c (parse_ls_color): Postprocess the list to
mark entries for case sensitive matching, or to
quickly ignore unmatchable entries.
* tests/ls/color-ext.sh: Add test cases.
* NEWS: Mention the change in behavior.
Adressess https://bugs.gnu.org/33123
---
 NEWS                  |  3 ++
 src/ls.c              | 66 +++++++++++++++++++++++++++++++++++++++----
 tests/ls/color-ext.sh | 46 +++++++++++++++++++++++++++++-
 3 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/NEWS b/NEWS
index db5150824..496d7a4da 100644
--- a/NEWS
+++ b/NEWS
@@ -40,6 +40,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   reverting to the behavior in coreutils-9.0 and earlier.
   This behavior is now documented.
 
+  ls --color now matches a file extension case sensitively
+  if there are different sequences defined for separate cases.
+
   runcon now exits with status 125 for internal errors.  Previously upon
   internal errors it would exit with status 1, which was less distinguishable
   from errors from the invoked command.
diff --git a/src/ls.c b/src/ls.c
index 2960f6d38..5a83415c6 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -612,6 +612,7 @@ struct color_ext_type
   {
     struct bin_str ext;		/* The extension we're looking for */
     struct bin_str seq;		/* The sequence to output when we do */
+    bool   exact_match;		/* Whether to compare case insensitively */
     struct color_ext_type *next;	/* Next in list */
   };
 
@@ -643,7 +644,7 @@ static struct bin_str color_indicator[] =
     { LEN_STR_PAIR ("\033[K") },	/* cl: clear to end of line */
   };
 
-/* FIXME: comment  */
+/* A list mapping file extensions to corresponding display sequence.  */
 static struct color_ext_type *color_ext_list = NULL;
 
 /* Buffer for color sequences */
@@ -2775,6 +2776,7 @@ parse_ls_color (void)
               ext = xmalloc (sizeof *ext);
               ext->next = color_ext_list;
               color_ext_list = ext;
+              ext->exact_match = false;
 
               ++p;
               ext->ext.string = buf;
@@ -2860,6 +2862,49 @@ parse_ls_color (void)
         }
       print_with_color = false;
     }
+  else
+    {
+      /* Postprocess list to set EXACT_MATCH on entries where there are
+         different cased extensions with separate sequences defined.
+         Also set ext.len to SIZE_MAX on any entries that can't
+         match due to precedence, to avoid redundant string compares.  */
+      struct color_ext_type *e1;
+
+      for (e1 = color_ext_list; e1 != NULL; e1 = e1->next)
+        {
+          struct color_ext_type *e2;
+          bool case_ignored = false;
+
+          for (e2 = e1->next; e2 != NULL; e2 = e2->next)
+            {
+              if (e2->ext.len < SIZE_MAX && e1->ext.len == e2->ext.len)
+                {
+                  if (memcmp (e1->ext.string, e2->ext.string, e1->ext.len) == 0)
+                    e2->ext.len = SIZE_MAX; /* Ignore */
+                  else if (c_strncasecmp (e1->ext.string, e2->ext.string,
+                                          e1->ext.len) == 0)
+                    {
+                      if (case_ignored)
+                        {
+                          e2->ext.len = SIZE_MAX; /* Ignore */
+                        }
+                      else if (e1->seq.len == e2->seq.len
+                               && memcmp (e1->seq.string, e2->seq.string,
+                                          e1->seq.len) == 0)
+                        {
+                          e2->ext.len = SIZE_MAX; /* Ignore */
+                          case_ignored = true;    /* Ignore all subsequent */
+                        }
+                      else
+                        {
+                          e1->exact_match = true;
+                          e2->exact_match = true;
+                        }
+                    }
+                }
+            }
+        }
+    }
 
   if (color_indicator[C_LINK].len == 6
       && !STRNCMP_LIT (color_indicator[C_LINK].string, "target"))
@@ -5040,10 +5085,21 @@ get_color_indicator (const struct fileinfo *f, bool symlink_target)
       name += len;		/* Pointer to final \0.  */
       for (ext = color_ext_list; ext != NULL; ext = ext->next)
         {
-          if (ext->ext.len <= len
-              && c_strncasecmp (name - ext->ext.len, ext->ext.string,
-                                ext->ext.len) == 0)
-            break;
+          if (ext->ext.len <= len)
+            {
+              if (ext->exact_match)
+                {
+                  if (STREQ_LEN (name - ext->ext.len, ext->ext.string,
+                                 ext->ext.len))
+                    break;
+                }
+              else
+                {
+                  if (c_strncasecmp (name - ext->ext.len, ext->ext.string,
+                                     ext->ext.len) == 0)
+                    break;
+                }
+            }
         }
     }
 
diff --git a/tests/ls/color-ext.sh b/tests/ls/color-ext.sh
index c9d3c0afb..64193a1fd 100755
--- a/tests/ls/color-ext.sh
+++ b/tests/ls/color-ext.sh
@@ -20,13 +20,16 @@
 print_ver_ ls
 working_umask_or_skip_
 
-touch img1.jpg IMG2.JPG file1.z file2.Z || framework_failure_
+touch img1.jpg IMG2.JPG img3.JpG file1.z file2.Z || framework_failure_
 code_jpg='01;35'
+code_JPG='01;35;46'
 code_z='01;31'
 c0=$(printf '\033[0m')
 c_jpg=$(printf '\033[%sm' $code_jpg)
+c_JPG=$(printf '\033[%sm' $code_JPG)
 c_z=$(printf '\033[%sm' $code_z)
 
+# Case insenitive extensions
 LS_COLORS="*.jpg=$code_jpg:*.Z=$code_z" ls -U1 --color=always \
   img1.jpg IMG2.JPG file1.z file2.Z > out || fail=1
 printf "$c0\
@@ -37,5 +40,46 @@ ${c_z}file2.Z$c0
 " > out_ok || framework_failure_
 compare out out_ok || fail=1
 
+# Case sensitive extensions
+LS_COLORS="*.jpg=$code_jpg:*.JPG=$code_JPG" ls -U1 --color=always \
+  img1.jpg IMG2.JPG img3.JpG > out || fail=1
+printf "$c0\
+${c_jpg}img1.jpg$c0
+${c_JPG}IMG2.JPG$c0
+img3.JpG
+" > out_ok || framework_failure_
+compare out out_ok || fail=1
+
+# Case insensitive extensions (due to same sequences)
+LS_COLORS="*.jpg=$code_jpg:*.JPG=$code_jpg" ls -U1 --color=always \
+  img1.jpg IMG2.JPG img3.JpG > out || fail=1
+printf "$c0\
+${c_jpg}img1.jpg$c0
+${c_jpg}IMG2.JPG$c0
+${c_jpg}img3.JpG$c0
+" > out_ok || framework_failure_
+compare out out_ok || fail=1
+
+# Case insensitive extensions (due to same sequences (after ignored sequences))
+# Note later entries in LS_COLORS take precedence.
+LS_COLORS="*.jpg=$code_jpg:*.jpg=$code_JPG:*.JPG=$code_JPG" \
+  ls -U1 --color=always img1.jpg IMG2.JPG img3.JpG > out || fail=1
+printf "$c0\
+${c_JPG}img1.jpg$c0
+${c_JPG}IMG2.JPG$c0
+${c_JPG}img3.JpG$c0
+" > out_ok || framework_failure_
+compare out out_ok || fail=1
+
+# Case sensitive extensions (due to diff sequences (after ignored sequences))
+# Note later entries in LS_COLORS take precedence.
+LS_COLORS="*.jpg=$code_JPG:*.jpg=$code_jpg:*.JPG=$code_JPG" \
+  ls -U1 --color=always img1.jpg IMG2.JPG img3.JpG > out || fail=1
+printf "$c0\
+${c_jpg}img1.jpg$c0
+${c_JPG}IMG2.JPG$c0
+img3.JpG
+" > out_ok || framework_failure_
+compare out out_ok || fail=1
 
 Exit $fail
-- 
2.26.2

Reply via email to