Hiroki Sato <[email protected]> wrote in <[email protected]>:
hr> 後藤大地 <[email protected]> wrote hr> in <[email protected]>: hr> hr> da> hr> da> hr> da> > 2018/07/02 15:55、Hiroki Sato <[email protected]>のメール: hr> da> > hr> da> > Eitan Adler <[email protected]> wrote hr> da> > in <CAF6rxg=Zjkf6EbSgt1fBQBUDHGKWwLf=n9zjwejh+di800k...@mail.gmail.com>: hr> da> > hr> da> > li> On 1 July 2018 at 10:08, Conrad Meyer <[email protected]> wrote: hr> da> > li> > Hi Daichi, hr> da> > li> > hr> da> > li> > hr> da> > li> > hr> da> > li> > I don't think code to decode UTF-8 belongs in top(1). I don't know hr> da> > li> > what the goal of this routine is, but I doubt this is the right way to hr> da> > li> > accomplish it. hr> da> > li> hr> da> > li> For the record, I agree. This is why I didn't click "accept" on the hr> da> > li> revision. I don't fully oppose leaving it in top(1) for now as we work hr> da> > li> out the API, but long term its the wrong place. hr> da> > li> hr> da> > li> https://reviews.freebsd.org/D16058 is the review. hr> da> > hr> da> > I strongly object this kind of encoding-specific routine. Please hr> da> > back out it. The problem is that top(1) does not support multibyte hr> da> > encoding in functions for printing, and using C99 wide/multibyte hr> da> > character manipulation API such as iswprint(3) is the way to solve hr> da> > it. Doing getenv("LANG") and assuming an encoding based on it is a hr> da> > very bad practice to internationalize software. hr> da> > hr> da> > -- Hiroki hr> da> hr> da> I respect what you mean. hr> da> hr> da> Once I back out, I will begin implementing it in a different way. hr> da> Please advise which function should be used for implementation hr> da> (iswprint (3) and what other functions should be used?) hr> hr> Roughly speaking, POSIX/XPG/C99 I18N model requires the following hr> steps: (snip) Are you going to back out r335836, or disagree about it? If there is no objection in the next 24 hours, I will commit the attached patch. This should be a minimal change to support multibyte characters in ARGV array depending on LC_CTYPE, not limited to UTF-8. -- Hiroki
Index: usr.bin/top/display.c
===================================================================
--- usr.bin/top/display.c (revision 335957)
+++ usr.bin/top/display.c (working copy)
@@ -1248,55 +1248,6 @@
}
}
-/*
- * printable(str) - make the string pointed to by "str" into one that is
- * printable (i.e.: all ascii), by converting all non-printable
- * characters into '?'. Replacements are done in place and a pointer
- * to the original buffer is returned.
- */
-
-char *
-printable(char str[])
-{
- char *ptr;
- char ch;
-
- ptr = str;
- if (utf8flag) {
- while ((ch = *ptr) != '\0') {
- if (0x00 == (0x80 & ch)) {
- if (!isprint(ch)) {
- *ptr = '?';
- }
- ++ptr;
- } else if (0xC0 == (0xE0 & ch)) {
- ++ptr;
- if ('\0' != *ptr) ++ptr;
- } else if (0xE0 == (0xF0 & ch)) {
- ++ptr;
- if ('\0' != *ptr) ++ptr;
- if ('\0' != *ptr) ++ptr;
- } else if (0xF0 == (0xF8 & ch)) {
- ++ptr;
- if ('\0' != *ptr) ++ptr;
- if ('\0' != *ptr) ++ptr;
- if ('\0' != *ptr) ++ptr;
- } else {
- *ptr = '?';
- ++ptr;
- }
- }
- } else {
- while ((ch = *ptr) != '\0') {
- if (!isprint(ch)) {
- *ptr = '?';
- }
- ptr++;
- }
- }
- return(str);
-}
-
void
i_uptime(struct timeval *bt, time_t *tod)
{
Index: usr.bin/top/display.h
===================================================================
--- usr.bin/top/display.h (revision 335957)
+++ usr.bin/top/display.h (working copy)
@@ -11,7 +11,6 @@
void clear_message(void);
int display_resize(void);
void i_header(const char *text);
-char *printable(char *string);
void display_header(int t);
int display_init(struct statics *statics);
void i_arc(int *stats);
Index: usr.bin/top/machine.c
===================================================================
--- usr.bin/top/machine.c (revision 335957)
+++ usr.bin/top/machine.c (working copy)
@@ -986,13 +986,8 @@
if (*src == '\0')
continue;
len = (argbuflen - (dst - argbuf) - 1) / 4;
- if (utf8flag) {
- utf8strvisx(dst, src, MIN(strlen(src), len));
- } else {
- strvisx(dst, src,
- MIN(strlen(src), len),
- VIS_NL | VIS_CSTYLE);
- }
+ strvisx(dst, src, MIN(strlen(src), len),
+ VIS_NL | VIS_CSTYLE | VIS_OCTAL);
while (*dst != '\0')
dst++;
if ((argbuflen - (dst - argbuf) - 1) / 4 > 0)
@@ -1089,7 +1084,7 @@
sbuf_printf(procbuf, "%6s ", format_time(cputime));
sbuf_printf(procbuf, "%6.2f%% ", ps.wcpu ? 100.0 * weighted_cpu(PCTCPU(pp), pp) : 100.0 * PCTCPU(pp));
}
- sbuf_printf(procbuf, "%s", printable(cmdbuf));
+ sbuf_printf(procbuf, "%s", cmdbuf);
free(cmdbuf);
return (sbuf_data(procbuf));
}
Index: usr.bin/top/top.c
===================================================================
--- usr.bin/top/top.c (revision 335957)
+++ usr.bin/top/top.c (working copy)
@@ -24,6 +24,7 @@
#include <errno.h>
#include <getopt.h>
#include <jail.h>
+#include <locale.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@@ -69,7 +70,6 @@
struct process_select ps;
const char * myname = "top";
pid_t mypid;
-bool utf8flag = false;
/* pointers to display routines */
static void (*d_loadave)(int mpid, double *avenrun) = i_loadave;
@@ -276,6 +276,11 @@
}
}
+ if (setlocale(LC_ALL, "") == NULL) {
+ fprintf(stderr, "%s: invalid locale.\n", myname);
+ exit(1);
+ }
+
mypid = getpid();
/* get our name */
@@ -607,14 +612,6 @@
fputc('\n', stderr);
}
- /* check if you are using UTF-8 */
- char *env_lang;
- if (NULL != (env_lang = getenv("LANG")) &&
- 0 != strcmp(env_lang, "") &&
- NULL != strstr(env_lang, "UTF-8")) {
- utf8flag = true;
- }
-
restart:
/*
Index: usr.bin/top/top.h
===================================================================
--- usr.bin/top/top.h (revision 335957)
+++ usr.bin/top/top.h (working copy)
@@ -8,7 +8,6 @@
#define TOP_H
#include <unistd.h>
-#include <stdbool.h>
/* Number of lines of header information on the standard screen */
extern int Header_lines;
@@ -38,7 +37,6 @@
extern int pcpu_stats;
extern int overstrike;
extern pid_t mypid;
-extern bool utf8flag;
extern const char * myname;
Index: usr.bin/top/utils.c
===================================================================
--- usr.bin/top/utils.c (revision 335957)
+++ usr.bin/top/utils.c (working copy)
@@ -2,7 +2,6 @@
* This program may be freely redistributed,
* but this entire comment MUST remain intact.
*
- * Copyright (c) 2018, Daichi Goto
* Copyright (c) 2018, Eitan Adler
* Copyright (c) 1984, 1989, William LeFebvre, Rice University
* Copyright (c) 1989, 1990, 1992, William LeFebvre, Northwestern University
@@ -329,61 +328,3 @@
kvm_close(kd);
return ret;
}
-
-/*
- * utf8strvisx(dst,src,src_len)
- * strvisx(dst,src,src_len,VIS_NL|VIS_CSTYLE) coresponding to UTF-8.
- */
-static const char *vis_encodes[] = {
- "\\0", "\\^A", "\\^B", "\\^C", "\\^D", "\\^E", "\\^F", "\\a",
- "\\b", "\t", "\\n", "\\v", "\\f", "\\r", "\\^N", "\\^O", "\\^P",
- "\\^Q", "\\^R", "\\^S", "\\^T", "\\^U", "\\^V", "\\^W", "\\^X",
- "\\^Y", "\\^Z", "\\^[", "\\^\\", "\\^]", "\\^^", "\\^_"
-};
-
-int
-utf8strvisx(char *dst, const char *src, size_t src_len)
-{
- const signed char *src_p;
- char *dst_p;
- int i, j, olen, len;
-
- src_p = src;
- dst_p = dst;
- i = olen = 0;
- len = (int)src_len;
- while (i < len) {
- if (0x00 == (0x80 & *src_p)) {
- if (0 <= *src_p && *src_p <= 31) {
- j = strlen(vis_encodes[(int)*src_p]);
- strcpy(dst_p, vis_encodes[(int)*src_p]);
- dst_p += j;
- olen += j;
- } else if (127 == *src_p) {
- strcpy(dst_p, "\\^?");
- olen += 3;
- } else {
- *dst_p++ = *src_p;
- ++olen;
- }
- ++i;
- ++src_p;
- } else if (0xC0 == (0xE0 & *src_p)) {
- *dst_p++ = *src_p++; ++i; ++olen;
- if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
- } else if (0xE0 == (0xF0 & *src_p)) {
- *dst_p++ = *src_p++; ++i; ++olen;
- if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
- if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
- } else if (0xF0 == (0xF8 & *src_p)) {
- *dst_p++ = *src_p++; ++i; ++olen;
- if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
- if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
- if (i < len) { *dst_p++ = *src_p++; ++i; ++olen; }
- } else {
- *dst_p++ = '?'; ++i; ++olen;
- }
- }
-
- return olen;
-}
Index: usr.bin/top/utils.h
===================================================================
--- usr.bin/top/utils.h (revision 335957)
+++ usr.bin/top/utils.h (working copy)
@@ -22,5 +22,4 @@
char *format_k(int64_t);
int string_index(const char *string, const char * const *array);
int find_pid(pid_t pid);
-int utf8strvisx(char *, const char *, size_t);
pgpQXgkzTpZHx.pgp
Description: PGP signature
