Kalle Olavi Niemitalo k...@iki.fi writes:
The attached patch for elinks-0.12
(20dfdb284f9a23742800fb5b4023bef54c6ad982) implements this, but
I'm not sure it is the right solution, because e.g. KOI8-R also
supports line-drawing characters so the fix should preferably
not be specific to UTF-8.
The attached new version of the patch makes elinks --dump use
line-drawing characters in all charsets where they are available.
I think this should be applied to master after the prerequisites
have been merged from elinks-0.12.
From 0dc9ac1a83f7a7d228476a4829c4b2a55daf Mon Sep 17 00:00:00 2001
From: Kalle Olavi Niemitalo k...@iki.fi
Date: Sun, 21 Jun 2009 18:30:36 +0300
Subject: [PATCH] dump: Use box-drawing characters if available
---
NEWS |2 +
src/viewer/dump/dump-specialized.h |4 +-
src/viewer/dump/dump.c | 80 ++--
3 files changed, 80 insertions(+), 6 deletions(-)
diff --git a/NEWS b/NEWS
index 166539a..e135750 100644
--- a/NEWS
+++ b/NEWS
@@ -88,6 +88,8 @@ includes the changes listed under ``ELinks 0.11.6.GIT now''
below.
* minor bug 1017: To work around HTTP server bugs, disable
protocol.http.compression by default, until ELinks can report
decompression errors or automatically retry the connection.
+* enhancement: ``elinks --dump'' uses box-drawing characters if supported
+ by the charset.
Bugs that should be removed from NEWS before the 0.12.0 release:
diff --git a/src/viewer/dump/dump-specialized.h
b/src/viewer/dump/dump-specialized.h
index 1afbea7..b8166f7 100644
--- a/src/viewer/dump/dump-specialized.h
+++ b/src/viewer/dump/dump-specialized.h
@@ -126,8 +126,8 @@ DUMP_FUNCTION_SPECIALIZED(struct document *document, struct
dump_output *out)
#endif /* DUMP_COLOR_MODE_TRUE */
if ((attr SCREEN_ATTR_FRAME)
-c = 176 c 224)
- c = frame_dumb[c - 176];
+c = FRAME_CHARS_BEGIN c FRAME_CHARS_END)
+ c = out-frame[c - FRAME_CHARS_BEGIN];
#ifdef DUMP_CHARSET_UTF8
if (!isscreensafe_ucs(c)) c = ' ';
diff --git a/src/viewer/dump/dump.c b/src/viewer/dump/dump.c
index 295a7aa..81d0382 100644
--- a/src/viewer/dump/dump.c
+++ b/src/viewer/dump/dump.c
@@ -53,6 +53,9 @@ static int dump_redir_count = 0;
#define D_BUF 65536
+#define FRAME_CHARS_BEGIN 0xB0
+#define FRAME_CHARS_END 0xE0
+
/** A place where dumping functions write their output. The data
* first goes to the buffer in this structure. When the buffer is
* full enough, it is flushed to a file descriptor or to a string. */
@@ -68,10 +71,76 @@ struct dump_output {
* flushed, or -1. */
int fd;
+ /** Mapping of SCREEN_ATTR_FRAME characters. If the target
+* codepage is UTF-8 (which is possible only if CONFIG_UTF8 is
+* defined), then the values are UTF-32. Otherwise, they are
+* in the target codepage, even though the type may still be
+* unicode_val_T. */
+#ifdef CONFIG_UTF8
+ unicode_val_T frame[FRAME_CHARS_END - FRAME_CHARS_BEGIN];
+#else
+ unsigned char frame[FRAME_CHARS_END - FRAME_CHARS_BEGIN];
+#endif
+
/** Bytes waiting to be flushed. */
unsigned char buf[D_BUF];
};
+/** Mapping from CP437 box-drawing characters to simpler CP437 characters.
+ * - Map mixed light/double lines to light lines or double lines,
+ * depending on the majority; or to light lines if even.
+ * - Map double lines to light lines.
+ * - Map light and dark shades to medium, then to full blocks.
+ * - Map half blocks to full blocks.
+ * - Otherwise map to ASCII characters. */
+static const unsigned char frame_simplify[FRAME_CHARS_END - FRAME_CHARS_BEGIN]
+= {
+ /*-0-1-2-3-4-5-6-7 */
+ /*-8-9-A-B-C-D-E-F */
+ 0xB1, 0xDB, 0xB1, '|' , '+' , 0xB4, 0xB9, 0xBF, /* 0xB0...0xB7 */
+ 0xC5, 0xB4, 0xB3, 0xBF, 0xD9, 0xD9, 0xD9, '+' , /* 0xB8...0xBF */
+ '+' , '+' , '+' , '+' , '-' , '+' , 0xC3, 0xCC, /* 0xC0...0xC7 */
+ 0xC0, 0xDA, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xCA, /* 0xC8...0xCF */
+ 0xC1, 0xCB, 0xC2, 0xC0, 0xC0, 0xDA, 0xDA, 0xC5, /* 0xD0...0xD7 */
+ 0xC5, '+' , '+' , '#' , 0xDB, 0xDB, 0xDB, 0xDB /* 0xD8...0xDF */
+};
+
+static void
+dump_output_prepare_frame(struct dump_output *out, int to_cp)
+{
+ const int cp437 = get_cp_index(cp437);
+ int orig;
+ unsigned char subst;
+
+#ifdef CONFIG_UTF8
+ if (is_cp_utf8(to_cp)) {
+ for (orig = FRAME_CHARS_BEGIN; orig FRAME_CHARS_END; orig++)
+ out-frame[orig - FRAME_CHARS_BEGIN]
+ = cp2u(cp437, orig);
+ return;
+ }
+#endif /* CONFIG_UTF8 */
+
+ for (orig = FRAME_CHARS_BEGIN; orig FRAME_CHARS_END; orig++) {
+ for (subst = orig;
+