On Tue, Jan 04, 2011 at 09:14:51PM +0300, Alexander Polakov wrote:
> Hi,
> 
> I wonder if there any plans on adding multibyte support for ls(1)?
> Or maybe there's a reason why it's not a great idea (which I am not
> aware of)?
> Anyway, here's a patch I have. It's based on DragonFlyBSD's ls.
> 

Any locale stuff added to applications that are used on the ramdisk
(bsd.rd) must be inside #ifndef SMALL.
The ls binary is linked statically so we need to prevent it from wasting
space by pulling citrus stuff onto the ramdisk.

More importantly, there is an alleged bug in our wcwidth() implementation.
I haven't had time to investigate, but it has been pointed out on separate
occasions, by Jordi Beltran Creix and by n...@.
Test program (from Jordi):

  #include <stdio.h>
  #include <locale.h>
  
  main ()
  {
        setlocale(LC_ALL, "");
        printf("%d %d %d %d\n", wcwidth(0x53DA), wcwidth('A'),
  wcwidth(0x200B), wcwidth(0x1F));
        return 0;
  }
  
Output is 2, 1, 1, 0, should be 2, 1, 0, -1 (according to Jordi).

We should make sure that wcwidth() is working properly before changing
applications to use it. We also need a wcwidth() man page.

FWIW, below is a diff that Jordi sent me some time ago to fix ls(1).
It also depends on wcwidth().

Index: ls.c
===================================================================
RCS file: /cvs/src/bin/ls/ls.c,v
retrieving revision 1.35
diff -u -p -r1.35 ls.c
--- ls.c        27 Oct 2009 23:59:21 -0000      1.35
+++ ls.c        7 Aug 2010 09:16:03 -0000
@@ -48,6 +48,8 @@
 #include <string.h>
 #include <unistd.h>
 #include <util.h>
+#include <locale.h>
+#include <wchar.h>
 
 #include "ls.h"
 #include "extern.h"
@@ -102,6 +104,10 @@ ls_main(int argc, char *argv[])
        int kflag = 0;
        char *p;
 
+#ifndef SMALL
+       setlocale(LC_ALL, "");
+
+#endif
        /* Terminal defaults to -Cq, non-terminal defaults to -1. */
        if (isatty(STDOUT_FILENO)) {
                if ((p = getenv("COLUMNS")) != NULL)
@@ -396,6 +402,32 @@ traverse(int argc, char *argv[], int opt
                err(1, "fts_read");
 }
 
+#ifndef SMALL
+static int
+mbswidth(const char *s)
+{
+       wchar_t wc;
+       size_t wclen;
+       mbstate_t mbs;
+       int width = 0;
+
+       bzero(&mbs, sizeof(mbs));
+
+       while (*s) {
+               wclen = mbrtowc(&wc, s, MB_CUR_MAX, &mbs);
+               if (wclen < 0 || !iswprint(wc)) {
+                       if (wclen < 0)
+                               wclen = 1;
+                       width++;
+               } else {
+                       width += wcwidth(wc);
+               }
+               s += wclen;
+       }
+       return width;
+}
+#endif
+
 /*
  * Display() takes a linked list of FTSENT structures and passes the list
  * along with any other necessary information to the print function.  P
@@ -458,8 +490,13 @@ display(FTSENT *p, FTSENT *list)
                                continue;
                        }
                }
+#ifndef SMALL
+               if (mbswidth(cur->fts_name) > maxlen)
+                       maxlen = mbswidth(cur->fts_name);
+#else
                if (cur->fts_namelen > maxlen)
                        maxlen = cur->fts_namelen;
+#endif
                if (needstats) {
                        sp = cur->fts_statp;
                        if (sp->st_blocks > maxblock)
Index: util.c
===================================================================
RCS file: /cvs/src/bin/ls/util.c,v
retrieving revision 1.14
diff -u -p -r1.14 util.c
--- util.c      27 Oct 2009 23:59:21 -0000      1.14
+++ util.c      7 Aug 2010 09:16:03 -0000
@@ -41,6 +41,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
 
 #include "ls.h"
 #include "extern.h"
@@ -49,9 +50,26 @@ int
 putname(char *name)
 {
        int len;
-
+#ifndef SMALL
+       size_t wclen;
+       wchar_t wc;
+       mbstate_t mbs;
+       
+       bzero(&mbs, sizeof(mbs));
+       for (len = 0; *name; len += wcwidth(wc), name += wclen) {
+               wclen=mbrtowc(&wc, name, MB_CUR_MAX, &mbs);
+               if (wclen < 0) {
+                       wclen = 1;
+                       wc = '?';
+               } else {
+                       wc = (!iswprint(wc) && f_nonprint) ? '?' : wc;
+               }
+               putwchar(wc);
+       }
+#else
        for (len = 0; *name; len++, name++)
                putchar((!isprint(*name) && f_nonprint) ? '?' : *name);
+#endif
        return len;
 }

Reply via email to