Until now, the prefixes and suffixes for custom currency formats
(CCA, etc.) have been considered to occupy one display column per
byte.  This is fine for prefixes and suffixes like "$" or "%", but
falls down badly with U+00A5 (¥) or U+20AC (€), which occupy two
or three bytes, respectively, in UTF-8, while occupying only a
single display column.

This commit fixes the problem.  It doesn't add a test yet because
there are still some higher-level issues, but that will come in
a later commit when those remaining issues are resolved.
---
 Smake               |    1 +
 src/data/data-out.c |   26 +++++++++++++++-----------
 src/data/format.c   |   26 ++++++++++++++++++--------
 src/data/format.h   |   12 +++++++++++-
 4 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/Smake b/Smake
index 6d54f5b..2210e66 100644
--- a/Smake
+++ b/Smake
@@ -78,6 +78,7 @@ GNULIB_MODULES = \
        unistr/u8-mbtouc \
        unistr/u8-strlen \
        unistr/u8-strncat \
+       uniwidth/u8-strwidth \
        unitypes \
        unlocked-io \
        vasprintf-posix \
diff --git a/src/data/data-out.c b/src/data/data-out.c
index a30e7e7..bb77437 100644
--- a/src/data/data-out.c
+++ b/src/data/data-out.c
@@ -131,11 +131,12 @@ char *
 data_out_pool (const union value *input, const char *encoding,
               const struct fmt_spec *format, struct pool *pool)
 {
+  const struct fmt_number_style *style = settings_get_style (format->type);
   char *output;
   char *t ;
   assert (fmt_check_output (format));
 
-  output = xmalloc (format->w + 1);
+  output = xmalloc (format->w + style->extra_bytes + 1);
 
   converters[format->type] (input, format, output);
 
@@ -602,9 +603,9 @@ output_decimal (const struct rounder *r, const struct 
fmt_spec *format,
          the negative suffix, plus (if negative) the negative
          prefix. */
       width = rounder_width (r, decimals, &integer_digits, &add_neg_prefix);
-      width += strlen (style->neg_suffix.s);
+      width += style->neg_suffix.width;
       if (add_neg_prefix)
-        width += strlen (style->neg_prefix.s);
+        width += style->neg_prefix.width;
       if (width > format->w)
         continue;
 
@@ -659,8 +660,11 @@ output_decimal (const struct rounder *r, const struct 
fmt_spec *format,
       if (add_neg_prefix)
         p = stpcpy (p, style->neg_suffix.s);
       else
-        p = mempset (p, ' ', strlen (style->neg_suffix.s));
-      assert (p == output + format->w);
+        p = mempset (p, ' ', style->neg_suffix.width);
+
+      assert (p >= output + format->w);
+      assert (p <= output + format->w + style->extra_bytes);
+      *p = '\0';
 
       return true;
     }
@@ -681,9 +685,9 @@ output_scientific (double number, const struct fmt_spec 
*format,
   char buf[64], *p;
 
   /* Allocate minimum required space. */
-  width = 6 + strlen (style->neg_suffix.s);
+  width = 6 + style->neg_suffix.width;
   if (number < 0)
-    width += strlen (style->neg_prefix.s);
+    width += style->neg_prefix.width;
   if (width > format->w)
     return false;
 
@@ -739,11 +743,11 @@ output_scientific (double number, const struct fmt_spec 
*format,
   if (number < 0)
     p = stpcpy (p, style->neg_suffix.s);
   else
-    p = mempset (p, ' ', strlen (style->neg_suffix.s));
+    p = mempset (p, ' ', style->neg_suffix.width);
 
-  assert (p == buf + format->w);
-  memcpy (output, buf, format->w);
-  output[format->w] = '\0';
+  assert (p >= output + format->w);
+  assert (p <= output + format->w + style->extra_bytes);
+  *p = '\0';
 
   return true;
 }
diff --git a/src/data/format.c b/src/data/format.c
index d3c6880..95e87a0 100644
--- a/src/data/format.c
+++ b/src/data/format.c
@@ -20,6 +20,7 @@
 
 #include <ctype.h>
 #include <stdlib.h>
+#include <uniwidth.h>
 
 #include "data/identifier.h"
 #include "data/settings.h"
@@ -113,7 +114,7 @@ fmt_settings_get_style (const struct fmt_settings *settings,
 
 /* Sets the number style for TYPE to have the given DECIMAL and GROUPING
    characters, negative prefix NEG_PREFIX, prefix PREFIX, suffix SUFFIX, and
-   negative suffix NEG_SUFFIX. */
+   negative suffix NEG_SUFFIX.  All of the strings are UTF-8 encoded. */
 void
 fmt_settings_set_style (struct fmt_settings *settings, enum fmt_type type,
                         char decimal, char grouping,
@@ -121,6 +122,7 @@ fmt_settings_set_style (struct fmt_settings *settings, enum 
fmt_type type,
                         const char *suffix, const char *neg_suffix)
 {
   struct fmt_number_style *style = &settings->styles[type];
+  int total_bytes, total_width;
 
   assert (grouping == '.' || grouping == ',' || grouping == 0);
   assert (decimal == '.' || decimal == ',');
@@ -134,6 +136,12 @@ fmt_settings_set_style (struct fmt_settings *settings, 
enum fmt_type type,
   fmt_affix_set (&style->neg_suffix, neg_suffix);
   style->decimal = decimal;
   style->grouping = grouping;
+
+  total_bytes = (strlen (neg_prefix) + strlen (prefix)
+                 + strlen (suffix) + strlen (neg_suffix));
+  total_width = (style->neg_prefix.width + style->prefix.width
+                 + style->suffix.width + style->neg_suffix.width);
+  style->extra_bytes = MAX (0, total_bytes - total_width);
 }
 
 /* Sets the decimal point character for the settings in S to DECIMAL.
@@ -934,11 +942,12 @@ max_digits_for_bytes (int bytes)
   return map[bytes - 1];
 }
 
-/* Sets AFFIX's string value to S. */
+/* Sets AFFIX's string value to S, a UTF-8 encoded string. */
 static void
 fmt_affix_set (struct fmt_affix *affix, const char *s)
 {
   affix->s = s[0] == '\0' ? CONST_CAST (char *, "") : xstrdup (s);
+  affix->width = u8_strwidth (CHAR_CAST (const uint8_t *, s), "UTF-8");
 }
 
 /* Frees data in AFFIX. */
@@ -970,6 +979,7 @@ fmt_number_style_clone (struct fmt_number_style *new,
   fmt_affix_set (&new->neg_suffix, old->neg_suffix.s);
   new->decimal = old->decimal;
   new->grouping = old->grouping;
+  new->extra_bytes = old->extra_bytes;
 }
 
 /* Destroys a struct fmt_number_style. */
@@ -985,20 +995,20 @@ fmt_number_style_destroy (struct fmt_number_style *style)
     }
 }
 
-/* Returns the total width of the standard prefix and suffix for
-   STYLE. */
+/* Returns the total width of the standard prefix and suffix for STYLE, in
+   display columns (e.g. as returned by u8_strwidth()). */
 int
 fmt_affix_width (const struct fmt_number_style *style)
 {
-  return strlen (style->prefix.s) + strlen (style->suffix.s);
+  return style->prefix.width + style->suffix.width;
 }
 
-/* Returns the total width of the negative prefix and suffix for
-   STYLE. */
+/* Returns the total width of the negative prefix and suffix for STYLE, in
+   display columns (e.g. as returned by u8_strwidth()). */
 int
 fmt_neg_affix_width (const struct fmt_number_style *style)
 {
-  return strlen (style->neg_prefix.s) + strlen (style->neg_suffix.s);
+  return style->neg_prefix.width + style->neg_suffix.width;
 }
 
 /* Returns the struct fmt_desc for the given format TYPE. */
diff --git a/src/data/format.h b/src/data/format.h
index 55643ab..7df3744 100644
--- a/src/data/format.h
+++ b/src/data/format.h
@@ -146,7 +146,8 @@ void fmt_settings_set_style (struct fmt_settings *, enum 
fmt_type,
 /* A prefix or suffix for a numeric output format. */
 struct fmt_affix
   {
-    char *s;                    /* String contents of affix. */
+    char *s;                    /* String contents of affix, in UTF-8. */
+    int width;                  /* Display width in columns (see wcwidth()). */
   };
 
 /* A numeric output style. */
@@ -158,6 +159,15 @@ struct fmt_number_style
     struct fmt_affix neg_suffix; /* Negative suffix. */
     char decimal;                /* Decimal point: '.' or ','. */
     char grouping;               /* Grouping character: ',', '.', or 0. */
+
+    /* A fmt_affix may require more bytes than its display width; for example,
+       U+00A5 (¥) is 3 bytes in UTF-8 but occupies only one display column.
+       This member is the sum of the number of bytes required by all of the
+       fmt_affix members in this struct, minus their display widths.  Thus, it
+       can be used to size memory allocations: for example, the formatted
+       result of CCA20.5 requires no more than (20 + extra_bytes) bytes in
+       UTF-8. */
+    int extra_bytes;
   };
 
 int fmt_affix_width (const struct fmt_number_style *);
-- 
1.7.2.3


_______________________________________________
pspp-dev mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/pspp-dev

Reply via email to