Rather than printing byte sequences for any non-ASCII characters,
printable (via isprintrune()) Unicode characters are displayed
normally. The usual \$, \t, \b and \\ escapes are displayed, but
other non-printing characters are replaced with a Unicode escape
(\uXXXX).
This may be controversial, as it contradicts POSIX. Rationale:
* Replacing printing non-ASCII runes with byte sequences
is pointless. There is no reason to escape multibyte
characters.
* UTF-8 sequences should not be printed. It is far more
useful to decode the sequence and print the Unicode code
point. '\u2028' is much easier to understand than
'\xe2\x80\xa8'--we are not forced to decode the
transformation format.
---
ed.c | 72 +++++++++++++++++++++++++++++++++++++-------------------------------
1 file changed, 39 insertions(+), 33 deletions(-)
diff --git a/ed.c b/ed.c
index 4b28848..e737d57 100644
--- a/ed.c
+++ b/ed.c
@@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
+#include "utf.h"
#include "util.h"
#define REGEXSIZE 100
@@ -653,48 +654,53 @@ doread(const char *fname)
}
static void
+lprint(char *s)
+{
+ int size;
+ Rune r;
+
+ while ((size = chartorune(&r, s)) > 0 && r != '\n') {
+ switch (r) {
+ case '$':
+ fputs("\\$", stdout);
+ break;
+ case '\t':
+ fputs("\\t", stdout);
+ break;
+ case '\b':
+ fputs("\\b", stdout);
+ break;
+ case '\\':
+ fputs("\\\\", stdout);
+ break;
+ default:
+ if (!isprintrune(r))
+ printf("\\u%04x", 0xFFFF & r);
+ else
+ fputrune(&r, stdout);
+ }
+ s += size;
+ }
+}
+
+static void
doprint(void)
{
- int i, c;
- char *s, *str;
+ int i;
+ char *s;
if (line1 <= 0 || line2 > lastln)
error("incorrect address");
for (i = line1; i <= line2; ++i) {
if (pflag == 'n')
printf("%d\t", i);
- for (s = gettxt(i); (c = *s) != '\n'; ++s) {
- if (pflag != 'l')
- goto print_char;
- switch (c) {
- case '$':
- str = "\\$";
- goto print_str;
- case '\t':
- str = "\\t";
- goto print_str;
- case '\b':
- str = "\\b";
- goto print_str;
- case '\\':
- str = "\\\\";
- goto print_str;
- default:
- if (!isprint(c)) {
- printf("\\x%x", 0xFF & c);
- break;
- }
- print_char:
- putchar(c);
- break;
- print_str:
- fputs(str, stdout);
- break;
- }
+ s = gettxt(i);
+ if (pflag == 'l') {
+ lprint(s);
+ fputs("$\n", stdout);
+ } else {
+ fputs(s, stdout);
}
- if (pflag == 'l')
- fputs("$", stdout);
- putc('\n', stdout);
}
curln = i - 1;
}
--
2.9.0