Kalle Olavi Niemitalo <k...@iki.fi> writes:

> The attached patch for elinks-0.12
> (20dfdb284f9a23742800fb5b4023bef54c6ad982) implements this, but
> I'm not sure it is the right solution, because e.g. KOI8-R also
> supports line-drawing characters so the fix should preferably
> not be specific to UTF-8.

The attached new version of the patch makes elinks --dump use
line-drawing characters in all charsets where they are available.
I think this should be applied to master after the prerequisites
have been merged from elinks-0.12.

From 0dc9ac1a83f7a7d228476a48ffff29c4b2a55daf Mon Sep 17 00:00:00 2001
From: Kalle Olavi Niemitalo <k...@iki.fi>
Date: Sun, 21 Jun 2009 18:30:36 +0300
Subject: [PATCH] dump: Use box-drawing characters if available

---
 NEWS                               |    2 +
 src/viewer/dump/dump-specialized.h |    4 +-
 src/viewer/dump/dump.c             |   80 ++++++++++++++++++++++++++++++++++--
 3 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/NEWS b/NEWS
index 166539a..e135750 100644
--- a/NEWS
+++ b/NEWS
@@ -88,6 +88,8 @@ includes the changes listed under ``ELinks 0.11.6.GIT now'' 
below.
 * minor bug 1017: To work around HTTP server bugs, disable
   protocol.http.compression by default, until ELinks can report
   decompression errors or automatically retry the connection.
+* enhancement: ``elinks --dump'' uses box-drawing characters if supported
+  by the charset.
 
 Bugs that should be removed from NEWS before the 0.12.0 release:
 
diff --git a/src/viewer/dump/dump-specialized.h 
b/src/viewer/dump/dump-specialized.h
index 1afbea7..b8166f7 100644
--- a/src/viewer/dump/dump-specialized.h
+++ b/src/viewer/dump/dump-specialized.h
@@ -126,8 +126,8 @@ DUMP_FUNCTION_SPECIALIZED(struct document *document, struct 
dump_output *out)
 #endif /* DUMP_COLOR_MODE_TRUE */
 
                        if ((attr & SCREEN_ATTR_FRAME)
-                           && c >= 176 && c < 224)
-                               c = frame_dumb[c - 176];
+                           && c >= FRAME_CHARS_BEGIN && c < FRAME_CHARS_END)
+                               c = out->frame[c - FRAME_CHARS_BEGIN];
 
 #ifdef DUMP_CHARSET_UTF8
                        if (!isscreensafe_ucs(c)) c = ' ';
diff --git a/src/viewer/dump/dump.c b/src/viewer/dump/dump.c
index 295a7aa..81d0382 100644
--- a/src/viewer/dump/dump.c
+++ b/src/viewer/dump/dump.c
@@ -53,6 +53,9 @@ static int dump_redir_count = 0;
 
 #define D_BUF  65536
 
+#define FRAME_CHARS_BEGIN 0xB0
+#define FRAME_CHARS_END   0xE0
+
 /** A place where dumping functions write their output.  The data
  * first goes to the buffer in this structure.  When the buffer is
  * full enough, it is flushed to a file descriptor or to a string.  */
@@ -68,10 +71,76 @@ struct dump_output {
         * flushed, or -1.  */
        int fd;
 
+       /** Mapping of SCREEN_ATTR_FRAME characters.  If the target
+        * codepage is UTF-8 (which is possible only if CONFIG_UTF8 is
+        * defined), then the values are UTF-32.  Otherwise, they are
+        * in the target codepage, even though the type may still be
+        * unicode_val_T.  */
+#ifdef CONFIG_UTF8
+       unicode_val_T frame[FRAME_CHARS_END - FRAME_CHARS_BEGIN];
+#else
+       unsigned char frame[FRAME_CHARS_END - FRAME_CHARS_BEGIN];
+#endif
+
        /** Bytes waiting to be flushed.  */
        unsigned char buf[D_BUF];
 };
 
+/** Mapping from CP437 box-drawing characters to simpler CP437 characters.
+ * - Map mixed light/double lines to light lines or double lines,
+ *   depending on the majority; or to light lines if even.
+ * - Map double lines to light lines.
+ * - Map light and dark shades to medium, then to full blocks.
+ * - Map half blocks to full blocks.
+ * - Otherwise map to ASCII characters.  */
+static const unsigned char frame_simplify[FRAME_CHARS_END - FRAME_CHARS_BEGIN]
+= {
+       /*-0    -1    -2    -3    -4    -5    -6    -7 */
+       /*-8    -9    -A    -B    -C    -D    -E    -F */
+       0xB1, 0xDB, 0xB1, '|' , '+' , 0xB4, 0xB9, 0xBF, /* 0xB0...0xB7 */
+       0xC5, 0xB4, 0xB3, 0xBF, 0xD9, 0xD9, 0xD9, '+' , /* 0xB8...0xBF */
+       '+' , '+' , '+' , '+' , '-' , '+' , 0xC3, 0xCC, /* 0xC0...0xC7 */
+       0xC0, 0xDA, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xCA, /* 0xC8...0xCF */
+       0xC1, 0xCB, 0xC2, 0xC0, 0xC0, 0xDA, 0xDA, 0xC5, /* 0xD0...0xD7 */
+       0xC5, '+' , '+' , '#' , 0xDB, 0xDB, 0xDB, 0xDB  /* 0xD8...0xDF */
+};
+
+static void
+dump_output_prepare_frame(struct dump_output *out, int to_cp)
+{
+       const int cp437 = get_cp_index("cp437");
+       int orig;
+       unsigned char subst;
+
+#ifdef CONFIG_UTF8
+       if (is_cp_utf8(to_cp)) {
+               for (orig = FRAME_CHARS_BEGIN; orig < FRAME_CHARS_END; orig++)
+                       out->frame[orig - FRAME_CHARS_BEGIN]
+                               = cp2u(cp437, orig);
+               return;
+       }
+#endif /* CONFIG_UTF8 */
+
+       for (orig = FRAME_CHARS_BEGIN; orig < FRAME_CHARS_END; orig++) {
+               for (subst = orig;
+                    subst >= FRAME_CHARS_BEGIN && subst < FRAME_CHARS_END;
+                    subst = frame_simplify[subst - FRAME_CHARS_BEGIN]) {
+                       unicode_val_T ucs = cp2u(cp437, subst);
+                       const unsigned char *result = u2cp_no_nbsp(ucs, to_cp);
+
+                       if (result && cp2u(to_cp, result[0]) == ucs
+                           && !result[1]) {
+                               subst = result[0];
+                               break;
+                       }
+                       /* Otherwise, the mapping from ucs to to_cp
+                        * was not accurate, and this loop will try
+                        * a simpler character.  */
+               }
+               out->frame[orig - FRAME_CHARS_BEGIN] = subst;
+       }
+}
+
 /** Allocate and initialize a struct dump_output.
  * The caller should eventually free the structure with mem_free().
  *
@@ -87,7 +156,7 @@ struct dump_output {
  *
  * @relates dump_output */
 static struct dump_output *
-dump_output_alloc(int fd, struct string *string)
+dump_output_alloc(int fd, struct string *string, int cp)
 {
        struct dump_output *out;
 
@@ -99,6 +168,8 @@ dump_output_alloc(int fd, struct string *string)
                out->fd = fd;
                out->string = string;
                out->bufpos = 0;
+
+               dump_output_prepare_frame(out, cp);
        }
        return out;
 }
@@ -285,7 +356,8 @@ dump_references(struct document *document, int fd, unsigned 
char buf[D_BUF])
 int
 dump_to_file(struct document *document, int fd)
 {
-       struct dump_output *out = dump_output_alloc(fd, NULL);
+       struct dump_output *out = dump_output_alloc(fd, NULL,
+                                                   document->options.cp);
        int error;
 
        if (!out) return -1;
@@ -326,7 +398,7 @@ dump_formatted(int fd, struct download *download, struct 
cache_entry *cached)
 
        render_document(&vs, &formatted, &o);
 
-       out = dump_output_alloc(fd, NULL);
+       out = dump_output_alloc(fd, NULL, o.cp);
        if (out) {
                int error;
 
@@ -611,7 +683,7 @@ add_document_to_string(struct string *string, struct 
document *document)
        assert(string && document);
        if_assert_failed return NULL;
 
-       out = dump_output_alloc(-1, string);
+       out = dump_output_alloc(-1, string, document->options.cp);
        if (!out) return NULL;
 
        error = dump_nocolor(document, out);
-- 
1.6.3.2.29.gda779

Attachment: pgp5aA4lIporx.pgp
Description: PGP signature

_______________________________________________
elinks-dev mailing list
elinks-dev@linuxfromscratch.org
http://linuxfromscratch.org/mailman/listinfo/elinks-dev

Reply via email to