Hiroshi Inoue wrote:
> >>>> I need someone with WIN32 experience to review and test this patch.
> >>> I don't understand why cache_locale_time() works on Windows. It sets
> >>> the LC_CTYPE but does not do any encoding coversion.
> >> Doesn't strftime_win32 do the conversion?
> >
> > Oh, I now see strftime is redefined as a macro in that C files. Thanks.
> >
> >>> Do month and
> >>> day-of-week names not work either, or do they work and the encoding
> >>> conversion for numeric/money, e.g. Euro, it not necessary?
> >> db_strdup does the conversion.
> >
> > Should we pull the encoding conversion into a separate function and have
> > strftime_win32() and db_strdup() both call it?
>
> We may be able to pull the conversion WideChars => UTF8 =>
> a PG encoding into an function.
OK, I have created a new function, win32_wchar_to_db_encoding(), to
share the conversion from wide characters to the database encoding.
New patch attached.
> BTW both PGLC_localeconv() and cache_locale_time() save the current
> LC_CTYPE first and restore them just before returning the functions.
> I'm suspicious if it's OK when errors occur in middle of the functions.
Yea, I added a comment questioning if that is a problem.
--
Bruce Momjian <[email protected]> http://momjian.us
EnterpriseDB http://enterprisedb.com
PG East: http://www.enterprisedb.com/community/nav-pg-east-2010.do
Index: src/backend/utils/adt/pg_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v
retrieving revision 1.53
diff -c -c -r1.53 pg_locale.c
*** src/backend/utils/adt/pg_locale.c 27 Feb 2010 20:20:44 -0000 1.53
--- src/backend/utils/adt/pg_locale.c 2 Mar 2010 18:11:41 -0000
***************
*** 4,10 ****
*
* Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
*
! * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.53 2010/02/27 20:20:44 momjian Exp $
*
*-----------------------------------------------------------------------
*/
--- 4,10 ----
*
* Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
*
! * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.51 2010/01/02 16:57:54 momjian Exp $
*
*-----------------------------------------------------------------------
*/
***************
*** 96,101 ****
--- 96,109 ----
static char *IsoLocaleName(const char *); /* MSVC specific */
#endif
+ #ifdef WIN32
+ static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf,
+ const size_t wchars, char *dst, size_t dstlen);
+ static char *db_encoding_strdup(const char *item, const char *str);
+ static size_t strftime_win32(char *dst, size_t dstlen, const wchar_t *format,
+ const struct tm *tm);
+ #endif
+
/*
* pg_perm_setlocale
***************
*** 387,392 ****
--- 395,488 ----
}
+ #ifdef WIN32
+ /*
+ * Convert wide character string (UTF16 on Win32) to UTF8, and then
+ * optionally to the db encoding.
+ */
+ static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf,
+ const size_t wchars, char *dst, size_t dstlen)
+ {
+ int db_encoding = GetDatabaseEncoding();
+ int utf8len;
+
+ /* Convert wide string (UTF16) to UTF8 */
+ utf8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, wchars, dst, dstlen, NULL, NULL);
+ if (utf8len == 0)
+ /* Does this leave LC_CTYPE set incorrectly? */
+ elog(ERROR,
+ "could not convert string %04x to UTF-8: error %lu", wbuf[0], GetLastError());
+ pfree(wbuf);
+
+ dst[utf8len] = '\0';
+ if (db_encoding != PG_UTF8)
+ {
+ PG_TRY();
+ {
+ char *convstr = pg_do_encoding_conversion(dst, utf8len, PG_UTF8, db_encoding);
+ if (dst != convstr)
+ {
+ strlcpy(dst, convstr, dstlen);
+ pfree(convstr);
+ }
+ }
+ PG_CATCH();
+ {
+ FlushErrorState();
+ dst[0] = '\0';
+ }
+ PG_END_TRY();
+ }
+
+ return pg_mbstrlen(dst);
+ }
+
+ /*
+ * This converts the LC_CTYPE-encoded string returned from the
+ * locale routines to the database encoding.
+ */
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+ int db_encoding = GetDatabaseEncoding();
+ size_t wchars, ilen, wclen, dstlen;
+ int bytes_per_char;
+ wchar_t *wbuf;
+ char *dst;
+
+ if (!str[0])
+ return strdup(str);
+
+ /* allocate wide character string */
+ ilen = strlen(str) + 1;
+ wclen = ilen * sizeof(wchar_t);
+ wbuf = (wchar_t *) palloc(wclen);
+
+ /* Convert multi-byte string using current LC_CTYPE to a wide-character string */
+ wchars = mbstowcs(wbuf, str, ilen);
+ if (wchars == (size_t) -1)
+ elog(ERROR,
+ "could not convert string to wide characters: error %lu", GetLastError());
+
+ /* allocate target string */
+ bytes_per_char = pg_encoding_max_length(PG_UTF8);
+ if (pg_encoding_max_length(db_encoding) > bytes_per_char)
+ bytes_per_char = pg_encoding_max_length(db_encoding);
+ dstlen = wchars * bytes_per_char + 1;
+ if ((dst = malloc(dstlen)) == NULL)
+ elog(ERROR, "could not allocate a destination buffer");
+
+ /* Convert wide string (UTF16) to db encoding */
+ win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen);
+
+ return dst;
+ }
+ #else
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+ return strdup(str);
+ }
+ #endif /* WIN32 */
+
/*
* Return the POSIX lconv struct (contains number/money formatting
* information) with locale information for all categories.
***************
*** 398,403 ****
--- 494,502 ----
struct lconv *extlconv;
char *save_lc_monetary;
char *save_lc_numeric;
+ #ifdef WIN32
+ char *save_lc_ctype = NULL;
+ #endif
/* Did we do it already? */
if (CurrentLocaleConvValid)
***************
*** 413,442 ****
if (save_lc_numeric)
save_lc_numeric = pstrdup(save_lc_numeric);
setlocale(LC_MONETARY, locale_monetary);
setlocale(LC_NUMERIC, locale_numeric);
!
! /* Get formatting information */
extlconv = localeconv();
/*
! * Must copy all values since restoring internal settings may overwrite
* localeconv()'s results.
*/
CurrentLocaleConv = *extlconv;
! CurrentLocaleConv.currency_symbol = strdup(extlconv->currency_symbol);
! CurrentLocaleConv.decimal_point = strdup(extlconv->decimal_point);
! CurrentLocaleConv.grouping = strdup(extlconv->grouping);
! CurrentLocaleConv.thousands_sep = strdup(extlconv->thousands_sep);
! CurrentLocaleConv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
! CurrentLocaleConv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
! CurrentLocaleConv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
! CurrentLocaleConv.negative_sign = strdup(extlconv->negative_sign);
! CurrentLocaleConv.positive_sign = strdup(extlconv->positive_sign);
CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;
! /* Try to restore internal settings */
if (save_lc_monetary)
{
setlocale(LC_MONETARY, save_lc_monetary);
--- 512,588 ----
if (save_lc_numeric)
save_lc_numeric = pstrdup(save_lc_numeric);
+ #ifdef WIN32
+ /*
+ * Ideally, the db server encoding and locale settings would
+ * always match. Unfortunately, WIN32 does not support UTF-8
+ * values for setlocale(), even though PostgreSQL runs fine with
+ * a UTF-8 encoding on Windows:
+ *
+ * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
+ *
+ * Therefore, we must set LC_CTYPE to match LC_NUMERIC and
+ * LC_MONETARY, call localeconv(), and use mbstowcs() to
+ * convert the locale-aware string, e.g. Euro symbol, which
+ * is not in UTF-8 to the server encoding.
+ */
+
+ if ((save_lc_ctype = setlocale(LC_CTYPE, NULL)) != NULL)
+ {
+ save_lc_ctype = pstrdup(save_lc_ctype);
+ /* Set LC_CTYPE to match LC_MONETARY? */
+ if (pg_strcasecmp(save_lc_ctype, locale_monetary) != 0)
+ setlocale(LC_CTYPE, locale_monetary);
+ }
+ #endif
+
setlocale(LC_MONETARY, locale_monetary);
setlocale(LC_NUMERIC, locale_numeric);
! /*
! * Get formatting information for LC_MONETARY, and LC_NUMERIC if they
! * are the same.
! */
extlconv = localeconv();
/*
! * Must copy all values since restoring internal settings might overwrite
* localeconv()'s results.
*/
CurrentLocaleConv = *extlconv;
!
! /* The first argument of db_encoding_strdup() is only used on WIN32 */
! CurrentLocaleConv.currency_symbol = db_encoding_strdup("currency_symbol", extlconv->currency_symbol);
! CurrentLocaleConv.int_curr_symbol = db_encoding_strdup("int_curr_symbol", extlconv->int_curr_symbol);
! CurrentLocaleConv.mon_decimal_point = db_encoding_strdup("mon_decimal_point", extlconv->mon_decimal_point);
CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
! CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup("mon_thousands_sep", extlconv->mon_thousands_sep);
! CurrentLocaleConv.negative_sign = db_encoding_strdup("negative_sign", extlconv->negative_sign);
! CurrentLocaleConv.positive_sign = db_encoding_strdup("positive_sign", extlconv->positive_sign);
CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;
! #ifdef WIN32
! if (save_lc_ctype && pg_strcasecmp(locale_numeric, locale_monetary) != 0)
! {
! setlocale(LC_CTYPE, locale_numeric);
! /* Get formatting information for LC_NUMERIC with matching LC_CTYPE */
! extlconv = localeconv();
! }
! #endif
!
! CurrentLocaleConv.decimal_point = db_encoding_strdup("decimal_point", extlconv->decimal_point);
! CurrentLocaleConv.grouping = strdup(extlconv->grouping);
! CurrentLocaleConv.thousands_sep = db_encoding_strdup("thousands_sep", extlconv->thousands_sep);
!
! /*
! * Restore internal settings
! */
! #ifdef WIN32
! if (save_lc_ctype)
! {
! setlocale(LC_CTYPE, save_lc_ctype);
! pfree(save_lc_ctype);
! }
! #endif
if (save_lc_monetary)
{
setlocale(LC_MONETARY, save_lc_monetary);
***************
*** 455,483 ****
#ifdef WIN32
/*
! * On win32, strftime() returns the encoding in CP_ACP, which is likely
! * different from SERVER_ENCODING. This is especially important in Japanese
! * versions of Windows which will use SJIS encoding, which we don't support
! * as a server encoding.
! *
! * Replace strftime() with a version that gets the string in UTF16 and then
! * converts it to the appropriate encoding as necessary.
*
* Note that this only affects the calls to strftime() in this file, which are
* used to get the locale-aware strings. Other parts of the backend use
* pg_strftime(), which isn't locale-aware and does not need to be replaced.
*/
static size_t
! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm * tm)
{
! size_t len;
wchar_t wbuf[MAX_L10N_DATA];
- int encoding;
! encoding = GetDatabaseEncoding();
!
! len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
! if (len == 0)
/*
* strftime call failed - return 0 with the contents of dst
--- 601,628 ----
#ifdef WIN32
/*
! * On WIN32, strftime() returns the encoding in CP_ACP (the default
! * operating system codpage for that computer), which is likely different
! * from SERVER_ENCODING. This is especially important in Japanese versions
! * of Windows which will use SJIS encoding, which we don't support as a
! * server encoding.
! *
! * So, instead of using strftime(), use wcsftime() to return the value in
! * wide characters (internally UTF16) and then convert it to the appropriate
! * database encoding.
*
* Note that this only affects the calls to strftime() in this file, which are
* used to get the locale-aware strings. Other parts of the backend use
* pg_strftime(), which isn't locale-aware and does not need to be replaced.
*/
static size_t
! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm *tm)
{
! size_t wchars;
wchar_t wbuf[MAX_L10N_DATA];
! wchars = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
! if (wchars == 0)
/*
* strftime call failed - return 0 with the contents of dst
***************
*** 485,511 ****
*/
return 0;
! len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
! if (len == 0)
! elog(ERROR,
! "could not convert string to UTF-8:error %lu", GetLastError());
!
! dst[len] = '\0';
! if (encoding != PG_UTF8)
! {
! char *convstr = pg_do_encoding_conversion(dst, len, PG_UTF8, encoding);
!
! if (dst != convstr)
! {
! strlcpy(dst, convstr, dstlen);
! len = strlen(dst);
! }
! }
!
! return len;
}
#define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
#endif /* WIN32 */
--- 630,641 ----
*/
return 0;
! return win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen);
}
+ /* redefine strftime() */
#define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
+
#endif /* WIN32 */
***************
*** 533,542 ****
elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
#ifdef WIN32
! /* set user's value of ctype locale */
save_lc_ctype = setlocale(LC_CTYPE, NULL);
if (save_lc_ctype)
save_lc_ctype = pstrdup(save_lc_ctype);
setlocale(LC_CTYPE, locale_time);
#endif
--- 663,674 ----
elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
#ifdef WIN32
! /* See the WIN32 comment near the top of PGLC_localeconv() */
save_lc_ctype = setlocale(LC_CTYPE, NULL);
if (save_lc_ctype)
save_lc_ctype = pstrdup(save_lc_ctype);
+ else
+ save_lc_ctype = pstrdup("");
setlocale(LC_CTYPE, locale_time);
#endif
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers