I'm updating this module with a view to fixing alignment issues in df etc. soon.
This fixes an unlikely over truncation bug and cleans up the interface so that one doesn't have to check for errors in all cases. I've also synced this with the util-linux-ng project for alignment in cal and fdisk et. al. there. cheers, Pádraig.
>From 2e06ac467763d043fdf6c5456c3d1a837f0af728 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?P=C3=A1draig=20Brady?= <[email protected]> Date: Mon, 15 Mar 2010 14:04:31 +0000 Subject: [PATCH] maint: update the mbsalign module * gl/lib/mbsalign.c (mbsalign): Support the MBA_UNIBYTE_FALLBACK flag which reverts to unibyte mode if one can't allocate memory or if there are invalid multibyte characters present. Note memory is no longer dynamically allocated in unibyte mode so one can assume that mbsalign() will not return an error if this flag is present. Fix an error where we would truncate too many characters in the presence of single byte non printable chars. Suppress a signed/unsigned comparison warning. (ambsalign): A new wrapper function to dynamically allocate the minimum memory required to hold the aligned string. * gl/lib/mbsalign.h: Add the MBA_UNIBYTE_FALLBACK flag and also document others that may be implemented in future. (ambsalign): A prototype for the new wrapper. --- gl/lib/mbsalign.c | 101 +++++++++++++++++++++++++++++++++++++++++----------- gl/lib/mbsalign.h | 23 ++++++++++++ 2 files changed, 102 insertions(+), 22 deletions(-) diff --git a/gl/lib/mbsalign.c b/gl/lib/mbsalign.c index be25956..870f29c 100644 --- a/gl/lib/mbsalign.c +++ b/gl/lib/mbsalign.c @@ -32,6 +32,7 @@ #endif /* Replace non printable chars. + Note \t and \n etc. are non printable. Return 1 if replacement made, 0 otherwise. */ static bool @@ -124,12 +125,12 @@ mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces) the trailing NUL. A return value of DEST_SIZE or larger means there wasn't enough space. DEST will be NUL terminated in any case. Return (size_t) -1 upon error (invalid multi-byte sequence in SRC, - or malloc failure). + or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified. Update *WIDTH to indicate how many columns were used before padding. */ size_t mbsalign (const char *src, char *dest, size_t dest_size, - size_t *width, mbs_align_t align, int flags _UNUSED_PARAMETER_) + size_t *width, mbs_align_t align, int flags) { size_t ret = -1; size_t src_size = strlen (src) + 1; @@ -149,12 +150,22 @@ mbsalign (const char *src, char *dest, size_t dest_size, { size_t src_chars = mbstowcs (NULL, src, 0); if (src_chars == (size_t) -1) - goto mbsalign_cleanup; + { + if (flags & MBA_UNIBYTE_FALLBACK) + goto mbsalign_unibyte; + else + goto mbsalign_cleanup; + } src_chars += 1; /* make space for NUL */ str_wc = malloc (src_chars * sizeof (wchar_t)); if (str_wc == NULL) - goto mbsalign_cleanup; - if (mbstowcs (str_wc, src, src_chars) > 0) + { + if (flags & MBA_UNIBYTE_FALLBACK) + goto mbsalign_unibyte; + else + goto mbsalign_cleanup; + } + if (mbstowcs (str_wc, src, src_chars) != 0) { str_wc[src_chars - 1] = L'\0'; wc_enabled = true; @@ -165,27 +176,36 @@ mbsalign (const char *src, char *dest, size_t dest_size, /* If we transformed or need to truncate the source string then create a modified copy of it. */ - if (conversion || (n_cols > *width)) + if (wc_enabled && (conversion || (n_cols > *width))) { - newstr = malloc (src_size); - if (newstr == NULL) - goto mbsalign_cleanup; - str_to_print = newstr; - if (wc_enabled) - { - n_cols = wc_truncate (str_wc, *width); - n_used_bytes = wcstombs (newstr, str_wc, src_size); - } - else + if (conversion) + { + /* May have increased the size by converting + \t to \uFFFD for example. */ + src_size = wcstombs(NULL, str_wc, 0) + 1; + } + newstr = malloc (src_size); + if (newstr == NULL) { - n_cols = *width; - n_used_bytes = n_cols; - memcpy (newstr, src, n_cols); - newstr[n_cols] = '\0'; + if (flags & MBA_UNIBYTE_FALLBACK) + goto mbsalign_unibyte; + else + goto mbsalign_cleanup; } + str_to_print = newstr; + n_cols = wc_truncate (str_wc, *width); + n_used_bytes = wcstombs (newstr, str_wc, src_size); } - if (*width > n_cols) +mbsalign_unibyte: + + if (n_cols > *width) /* Unibyte truncation required. */ + { + n_cols = *width; + n_used_bytes = n_cols; + } + + if (*width > n_cols) /* Padding required. */ n_spaces = *width - n_cols; /* indicate to caller how many cells needed (not including padding). */ @@ -218,7 +238,8 @@ mbsalign (const char *src, char *dest, size_t dest_size, } dest = mbs_align_pad (dest, dest_end, start_spaces); - dest = mempcpy(dest, str_to_print, MIN (n_used_bytes, dest_end - dest)); + size_t space_left = dest_end - dest; + dest = mempcpy (dest, str_to_print, MIN (n_used_bytes, space_left)); mbs_align_pad (dest, dest_end, end_spaces); } @@ -229,3 +250,39 @@ mbsalign_cleanup: return ret; } + +/* A wrapper around mbsalign() to dynamically allocate the + minimum amount of memory to store the result. + NULL is returned on failure. */ + +char * +ambsalign (const char *src, size_t *width, mbs_align_t align, int flags) +{ + size_t orig_width = *width; + size_t size = *width; /* Start with enough for unibyte mode. */ + size_t req = size; + char *buf = NULL; + + while (req >= size) + { + size = req + 1; /* Space for NUL. */ + char *nbuf = realloc (buf, size); + if (nbuf == NULL) + { + free (buf); + buf = NULL; + break; + } + buf = nbuf; + *width = orig_width; + req = mbsalign (src, buf, size, width, align, flags); + if (req == (size_t) -1) + { + free (buf); + buf = NULL; + break; + } + } + + return buf; +} diff --git a/gl/lib/mbsalign.h b/gl/lib/mbsalign.h index a4ec693..41bd490 100644 --- a/gl/lib/mbsalign.h +++ b/gl/lib/mbsalign.h @@ -18,6 +18,29 @@ typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t; +enum { + /* Use unibyte mode for invalid multibyte strings or + or when heap memory is exhausted. */ + MBA_UNIBYTE_FALLBACK = 0x0001, + +#if 0 /* Other possible options. */ + /* Skip invalid multibyte chars rather than failing */ + MBA_IGNORE_INVALID = 0x0002, + + /* Align multibyte strings using "figure space" (\u2007) */ + MBA_USE_FIGURE_SPACE = 0x0004, + + /* Don't add any padding */ + MBA_TRUNCATE_ONLY = 0x0008, + + /* Don't truncate */ + MBA_PAD_ONLY = 0x0010, +#endif +}; + size_t mbsalign (const char *src, char *dest, size_t dest_size, size_t *width, mbs_align_t align, int flags); + +char * +ambsalign (const char *src, size_t *width, mbs_align_t align, int flags); -- 1.6.2.5
