Hi,
I wonder if there any plans on adding multibyte support for ls(1)?
Or maybe there's a reason why it's not a great idea (which I am not
aware of)?
Anyway, here's a patch I have. It's based on DragonFlyBSD's ls.
diff -u ls/ls.c ls/ls.c
--- ls/ls.c Wed Nov 24 17:39:05 2010
+++ ls/ls.c Tue Jan 4 19:44:35 2011
@@ -42,6 +42,7 @@
#include <errno.h>
#include <fts.h>
#include <grp.h>
+#include <locale.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -101,6 +102,8 @@
int ch, fts_options, notused;
int kflag = 0;
char *p;
+
+ setlocale(LC_ALL, "");
/* Terminal defaults to -Cq, non-terminal defaults to -1. */
if (isatty(STDOUT_FILENO)) {
diff -u ls/util.c ls/util.c
--- ls/util.c Wed Nov 24 17:39:05 2010
+++ ls/util.c Tue Jan 4 21:04:22 2011
@@ -35,12 +35,14 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/limits.h>
#include <ctype.h>
#include <fts.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
#include "ls.h"
#include "extern.h"
@@ -48,11 +50,44 @@
int
putname(char *name)
{
- int len;
+ mbstate_t mbs;
+ wchar_t wc;
+ int i, len;
+ size_t clen;
- for (len = 0; *name; len++, name++)
- putchar((!isprint(*name) && f_nonprint) ? '?' : *name);
- return len;
+ memset(&mbs, 0, sizeof(mbs));
+ len = 0;
+ while ((clen = mbrtowc(&wc, name, MB_LEN_MAX, &mbs)) != 0) {
+ if (clen == (size_t)-1) {
+ if (f_nonprint)
+ putchar('?');
+ else
+ putchar((unsigned char)*name);
+ name++;
+ len++;
+ memset(&mbs, 0, sizeof(mbs));
+ continue;
+ }
+ if (clen == (size_t)-2) {
+ if (f_nonprint) {
+ putchar('?');
+ len++;
+ } else
+ len += printf("%s", name);
+ break;
+ }
+ if (f_nonprint && !iswprint(wc)) {
+ putchar('?');
+ name += clen;
+ len++;
+ continue;
+ }
+ for (i = 0; i < (int)clen; i++)
+ putchar((unsigned char)name[i]);
+ name += clen;
+ len += wcwidth(wc);
+ }
+ return (len);
}
void