Currently fmt assumes that 1 byte= 1 column which creates wrongly formatted strings. Attached patch fixes it
-- Regards Vladimir 'φ-coder/phcoder' Serbinenko
diff --git a/src/fmt.c b/src/fmt.c
index 89d13a6..56f7c0b 100644
--- a/src/fmt.c
+++ b/src/fmt.c
@@ -20,6 +20,7 @@
#include <stdio.h>
#include <sys/types.h>
#include <getopt.h>
+#include <wchar.h>
/* Redefine. Otherwise, systems (Unicos for one) with headers that define
it to be a type get syntax errors for the variable declaration below. */
@@ -135,6 +136,7 @@ struct Word
const char *text; /* the text of the word */
int length; /* length of this word */
+ int width;
int space; /* the size of the following space */
unsigned int paren:1; /* starts with open paren */
unsigned int period:1; /* ends in [.?!])* */
@@ -259,6 +261,42 @@ static int next_prefix_indent;
paragraphs chosen by fmt_paragraph(). */
static int last_line_length;
+static size_t
+get_display_width (const char *beg, const char *end)
+{
+ const char *ptr;
+ size_t r = 0;
+ mbstate_t ps;
+
+ memset (&ps, 0, sizeof (ps));
+
+ for (ptr = beg; *ptr && ptr < end; )
+ {
+ wchar_t wc;
+ size_t s;
+
+ s = mbrtowc (&wc, ptr, end - ptr, &ps);
+ if (s == (size_t) -1)
+ break;
+ if (s == (size_t) -2)
+ {
+ ptr++;
+ r++;
+ continue;
+ }
+ if (wc == '\e' && ptr + 3 < end
+ && ptr[1] == '[' && (ptr[2] == '0' || ptr[2] == '1')
+ && ptr[3] == 'm')
+ {
+ ptr += 4;
+ continue;
+ }
+ r += wcwidth (wc);
+ ptr += s;
+ }
+ return r;
+}
+
void
usage (int status)
{
@@ -669,7 +707,9 @@ get_line (FILE *f, int c)
c = getc (f);
}
while (c != EOF && !isspace (c));
- in_column += word_limit->length = wptr - word_limit->text;
+ word_limit->length = wptr - word_limit->text;
+ in_column += word_limit->width = get_display_width (word_limit->text,
+ wptr);
check_punctuation (word_limit);
/* Scan inter-word space. */
@@ -871,13 +911,13 @@ fmt_paragraph (void)
if (w == word_limit)
break;
- len += (w - 1)->space + w->length; /* w > start >= word */
+ len += (w - 1)->space + w->width; /* w > start >= word */
}
while (len < max_width);
start->best_cost = best + base_cost (start);
}
- word_limit->length = saved_length;
+ word_limit->width = saved_length;
}
/* Return the constant component of the cost of breaking before the
@@ -902,13 +942,13 @@ base_cost (WORD *this)
else if ((this - 1)->punct)
cost -= PUNCT_BONUS;
else if (this > word + 1 && (this - 2)->final)
- cost += WIDOW_COST ((this - 1)->length);
+ cost += WIDOW_COST ((this - 1)->width);
}
if (this->paren)
cost -= PAREN_BONUS;
else if (this->final)
- cost += ORPHAN_COST (this->length);
+ cost += ORPHAN_COST (this->width);
return cost;
}
@@ -983,7 +1023,7 @@ put_word (WORD *w)
s = w->text;
for (n = w->length; n != 0; n--)
putchar (*s++);
- out_column += w->length;
+ out_column += w->width;
}
/* Output to stdout SPACE spaces, or equivalent tabs. */
signature.asc
Description: OpenPGP digital signature
