mbstowcs() doesn't NUL terminate the string when a UTF8 locale is loaded
and the "n" parameter is one greater than the length of the input string
("s") and the input string is all ASCII.
The failure happens because the "fast path" for ASCII only handles
characters > 0 and "n" gets decremented to 0 after the loop test and is
passed to _citrus_utf8_ctype_mbrtowc() which exits due to "Incomplete
multibyte sequence".
Included is my test program (also tested with postgresql).
(I added the code in the first section so that the routine didn't depend
on how _citrus_utf8_ctype_mbrtowc() returns from error. It doesn't seem
to be necessary.)
I am running "OpenBSD venvis.cove-mtn.com 4.8 GENERIC.MP#9 amd64" with 6
cores, 16gig memory and big mem enabled.
Brad
mbtest.c
---------------------------------
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <locale.h>
static char mbs[1024];
static wchar_t ws[1024];
static int i = -100;;
static int len = 8; // 1 bigger than strlen("Testing");
int main(int argc, char **argv)
{
int j;
if (argc > 1)
setlocale(LC_ALL, "en_US.UTF-8");
memset(mbs, ' ', sizeof(mbs));
strcpy(mbs, "Testing");
j = strlen(mbs);
printf("LEN: %d\n", j);
memset(ws, ' ', sizeof(ws));
i = mbstowcs(ws, mbs, len);
printf("mbstowcs = %d\n", i);
for (j=0; ws[j]; j++)
printf("%2d ", ws[j]);
printf("\n");
memset(mbs, '-', sizeof(mbs));
i = wcstombs(mbs, ws, len);
printf("wcstombs = %d\n", i);
printf("%s\n", mbs);
return 0;
}
cvs server: Diffing inside .
Index: citrus_utf8.c
===================================================================
RCS file: /cvs/src/lib/libc/citrus/citrus_utf8.c,v
retrieving revision 1.3
diff -u citrus_utf8.c
--- citrus_utf8.c 5 Aug 2010 17:13:53 -0000 1.3
+++ citrus_utf8.c 29 Mar 2011 23:07:38 -0000
@@ -217,6 +217,8 @@
* excluding NUL.
*/
nb = 1;
+ } else if (*src == 0) {
+ return (nchr);
} else {
nb = _citrus_utf8_ctype_mbrtowc(&wc, src,
_CITRUS_UTF8_MB_CUR_MAX, us);
@@ -252,6 +254,9 @@
*/
*pwcs = (wchar_t)*src;
nb = 1;
+ } else if (*src == 0) {
+ *pwcs = 0;
+ break;
} else {
nb = _citrus_utf8_ctype_mbrtowc(pwcs, src, n, us);
if (nb == (size_t)-1) {