On Wed, Jul 20, 2022 at 10:27 PM Juan José Santamaría Flecha <juanjo.santama...@gmail.com> wrote: > On Tue, Jul 19, 2022 at 4:47 AM Thomas Munro <thomas.mu...@gmail.com> wrote: >> As for whether "accordingly" still applies, by the logic of of >> win32_langinfo()... Windows still considers WIN1252 to be the default >> ANSI code page for "en-US", though it'd work with UTF-8 too. I'm not >> sure what to make of that. The goal here was to give Windows users >> good defaults, but WIN1252 is probably not what most people actually >> want. Hmph. > > > Still, WIN1252 is not the wrong answer for what we are asking. Even if you > enable UTF-8 support [1], the system will use the current default Windows > ANSI code page (ACP) for the locale and UTF-8 for the code page.
I'm still confused about what that means. Suppose we decided to insist by adding a ".UTF-8" suffix to the name, as that page says we can now that we're on Windows 10+, when building the default locale name (see experimental 0002 patch, attached). It initially seemed to have the right effect: The database cluster will be initialized with locale "en-US.UTF-8". The default database encoding has accordingly been set to "UTF8". The default text search configuration will be set to "english". But then the Turkish i test in contrib/citext/sql/citext_utf8.sql failed[1]: SELECT 'i'::citext = 'İ'::citext AS t; t --- - t + f (1 row) About the pg_upgrade problem, maybe it's OK ... existing old format names should continue to work, but we can still remove the weird code that does locale name tweaking, right? pg_upgraded databases should contain fixed names (ie that were fixed by old initdb so should continue to work), and new clusters will get BCP 47 names. I don't really know, I was just playing with rough ideas by sending patches to CI here... [1] https://cirrus-ci.com/task/6423238052937728
From b007eb45e575956d5035f4152f72177abddc2762 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Tue, 19 Jul 2022 06:31:17 +1200 Subject: [PATCH v3 1/3] Default to BCP 47 locale in initdb on Windows. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid selecting traditional Windows locale names written with English words, because they are unstable and not recommended for use in databases. Since setlocale() returns such names, on Windows use GetUserDefaultLocaleName() if the user didn't provide an explicit locale. Also update the documentation to recommend BCP 47 over the traditional names when providing explicit values to initdb. Reviewed-by: Juan José Santamaría Flecha <juanjo.santama...@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKGJ%3DXThErgAQRoqfCy1bKPxXVuF0%3D2zDbB%2BSxDs59pv7Fw%40mail.gmail.com --- doc/src/sgml/charset.sgml | 10 ++++++++-- src/bin/initdb/initdb.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 445fd175d8..b656ca489f 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -83,8 +83,14 @@ initdb --locale=sv_SE system under what names depends on what was provided by the operating system vendor and what was installed. On most Unix systems, the command <literal>locale -a</literal> will provide a list of available locales. - Windows uses more verbose locale names, such as <literal>German_Germany</literal> - or <literal>Swedish_Sweden.1252</literal>, but the principles are the same. + </para> + + <para> + Windows uses BCP 47 language tags, like ICU. + For example, <literal>sv-SE</literal> represents Swedish as spoken in Sweden. + Windows also supports more verbose locale names based on English words, + such as <literal>German_Germany</literal> or <literal>Swedish_Sweden.1252</literal>, + but these are not recommended. </para> <para> diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 89b888eaa5..3af08b7b99 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -59,6 +59,10 @@ #include "sys/mman.h" #endif +#ifdef WIN32 +#include <winnls.h> +#endif + #include "access/xlog_internal.h" #include "catalog/pg_authid_d.h" #include "catalog/pg_class_d.h" /* pgrminclude ignore */ @@ -2007,6 +2011,7 @@ locale_date_order(const char *locale) static void check_locale_name(int category, const char *locale, char **canonname) { + char *locale_copy; char *save; char *res; @@ -2022,10 +2027,30 @@ check_locale_name(int category, const char *locale, char **canonname) /* for setlocale() call */ if (!locale) - locale = ""; + { +#ifdef WIN32 + wchar_t wide_name[LOCALE_NAME_MAX_LENGTH]; + char name[LOCALE_NAME_MAX_LENGTH]; + + /* use Windows API to find the default in BCP47 format */ + if (GetUserDefaultLocaleName(wide_name, LOCALE_NAME_MAX_LENGTH) == 0) + pg_fatal("failed to get default locale name: error code %lu", + GetLastError()); + if (WideCharToMultiByte(CP_ACP, 0, wide_name, -1, name, + LOCALE_NAME_MAX_LENGTH, NULL, NULL) == 0) + pg_fatal("failed to convert locale name: error code %lu", + GetLastError()); + locale_copy = pg_strdup(name); +#else + /* use environment to find the default */ + locale_copy = pg_strdup(""); +#endif + } + else + locale_copy = pg_strdup(locale); /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); + res = setlocale(category, locale_copy); /* save canonical name if requested. */ if (res && canonname) @@ -2054,6 +2079,8 @@ check_locale_name(int category, const char *locale, char **canonname) pg_fatal("invalid locale settings; check LANG and LC_* environment variables"); } } + + free(locale_copy); } /* -- 2.30.2
From 430fda564b3a36f346a672668e5181f43132368e Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Wed, 20 Jul 2022 22:38:50 +1200 Subject: [PATCH v3 2/3] Default to UTF-8 in initdb on Windows. --- src/backend/utils/adt/pg_locale.c | 13 ++++++++++++- src/bin/initdb/initdb.c | 4 +++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 607a4b7340..6242dc094d 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1687,8 +1687,19 @@ get_collation_actual_version(char collprovider, const char *collcollate) */ NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)}; WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH]; + char copy_collcollate[LOCALE_NAME_MAX_LENGTH]; - MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate, + /* Trim off encoding, if there is one */ + strlcpy(copy_collcollate, collcollate, sizeof(copy_collcollate)); + for (char *p = copy_collcollate; *p; ++p) + { + if (*p == '.') + { + *p = 0; + break; + } + } + MultiByteToWideChar(CP_ACP, 0, copy_collcollate, -1, wide_collcollate, LOCALE_NAME_MAX_LENGTH); if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version)) { diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 3af08b7b99..1170b80d75 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2040,7 +2040,9 @@ check_locale_name(int category, const char *locale, char **canonname) LOCALE_NAME_MAX_LENGTH, NULL, NULL) == 0) pg_fatal("failed to convert locale name: error code %lu", GetLastError()); - locale_copy = pg_strdup(name); + /* default to UTF-8, no matter what the system default codepage is */ + locale_copy = pg_malloc(strlen(name) + sizeof(".UTF-8") + 1); + sprintf(locale_copy, "%s.UTF-8", name); #else /* use environment to find the default */ locale_copy = pg_strdup(""); -- 2.30.2
From 5281778fbcd20155fd4613732c00e140f7745842 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Tue, 19 Jul 2022 08:53:08 +1200 Subject: [PATCH v3 3/3] Remove support for old Windows locale names. We now use BCP 47 locale names by default and also advise those for explicit use. Remove support for munging the old unstable and unsystematic English word-style locale names. If you explicitly provide a traditional value like "English_United States.1521", it will still work, but we won't do any kind of cleanup on the name. This should be enough to work with pg_upgrade'd systems from before we started using BCP 47. XXX Is that true? --- configure | 6 - configure.ac | 1 - doc/src/sgml/charset.sgml | 2 +- src/backend/utils/adt/pg_locale.c | 231 +++--------------------------- src/include/port/win32_port.h | 9 -- src/port/win32setlocale.c | 193 ------------------------- src/tools/msvc/Mkvcbuild.pm | 2 +- 7 files changed, 19 insertions(+), 425 deletions(-) delete mode 100644 src/port/win32setlocale.c diff --git a/configure b/configure index 59fa82b8d7..d0da0c0a40 100755 --- a/configure +++ b/configure @@ -17180,12 +17180,6 @@ esac ;; esac - case " $LIBOBJS " in - *" win32setlocale.$ac_objext "* ) ;; - *) LIBOBJS="$LIBOBJS win32setlocale.$ac_objext" - ;; -esac - case " $LIBOBJS " in *" win32stat.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS win32stat.$ac_objext" diff --git a/configure.ac b/configure.ac index 612dabf698..c0906429be 100644 --- a/configure.ac +++ b/configure.ac @@ -1996,7 +1996,6 @@ if test "$PORTNAME" = "win32"; then AC_LIBOBJ(win32error) AC_LIBOBJ(win32ntdll) AC_LIBOBJ(win32security) - AC_LIBOBJ(win32setlocale) AC_LIBOBJ(win32stat) AC_DEFINE([HAVE_SYMLINK], 1, [Define to 1 if you have the `symlink' function.]) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index b656ca489f..3c12f3f344 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -90,7 +90,7 @@ initdb --locale=sv_SE For example, <literal>sv-SE</literal> represents Swedish as spoken in Sweden. Windows also supports more verbose locale names based on English words, such as <literal>German_Germany</literal> or <literal>Swedish_Sweden.1252</literal>, - but these are not recommended. + but these should not be used in PostgreSQL. </para> <para> diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 6242dc094d..f7f51a7055 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -118,7 +118,7 @@ static HTAB *collation_cache = NULL; #if defined(WIN32) && defined(LC_MESSAGES) -static char *IsoLocaleName(const char *); /* MSVC specific */ +static char *PosixLocaleName(const char *); #endif #ifdef USE_ICU @@ -204,10 +204,7 @@ pg_perm_setlocale(int category, const char *locale) case LC_MESSAGES: envvar = "LC_MESSAGES"; #ifdef WIN32 - result = IsoLocaleName(locale); - if (result == NULL) - result = (char *) locale; - elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result); + locale = PosixLocaleName(locale); #endif /* WIN32 */ break; #endif /* LC_MESSAGES */ @@ -905,218 +902,35 @@ cache_locale_time(void) #if defined(WIN32) && defined(LC_MESSAGES) /* - * Convert a Windows setlocale() argument to a Unix-style one. + * Convert a Windows BCP 47 locale name to a POSIX one. * * Regardless of platform, we install message catalogs under a Unix-style * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings * following that style will elicit localized interface strings. * - * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C" - * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>], - * case-insensitive. setlocale() returns the fully-qualified form; for - * example, setlocale("thaI") returns "Thai_Thailand.874". Internally, - * setlocale() and _create_locale() select a "locale identifier"[1] and store - * it in an undocumented _locale_t field. From that LCID, we can retrieve the - * ISO 639 language and the ISO 3166 country. Character encoding does not - * matter, because the server and client encodings govern that. - * - * Windows Vista introduced the "locale name" concept[2], closely following - * RFC 4646. Locale identifiers are now deprecated. Starting with Visual - * Studio 2012, setlocale() accepts locale names in addition to the strings it - * accepted historically. It does not standardize them; setlocale("Th-tH") - * returns "Th-tH". setlocale(category, "") still returns a traditional - * string. Furthermore, msvcr110.dll changed the undocumented _locale_t - * content to carry locale names instead of locale identifiers. - * - * Visual Studio 2015 should still be able to do the same as Visual Studio - * 2012, but the declaration of locale_name is missing in _locale_t, causing - * this code compilation to fail, hence this falls back instead on to - * enumerating all system locales by using EnumSystemLocalesEx to find the - * required locale name. If the input argument is in Unix-style then we can - * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as - * LOCALE_SNAME. - * - * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in - * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built - * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit - * localized messages. In particular, every lc_messages setting that initdb - * can select automatically will yield only C-locale messages. XXX This could - * be fixed by running the fully-qualified locale name through a lookup table. - * - * This function returns a pointer to a static buffer bearing the converted - * name or NULL if conversion fails. - * - * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers - * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names - */ - -/* - * Callback function for EnumSystemLocalesEx() in get_iso_localename(). - * - * This function enumerates all system locales, searching for one that matches - * an input with the format: <Language>[_<Country>], e.g. - * English[_United States] - * - * The input is a three wchar_t array as an LPARAM. The first element is the - * locale_name we want to match, the second element is an allocated buffer - * where the Unix-style locale is copied if a match is found, and the third - * element is the search status, 1 if a match was found, 0 otherwise. - */ -static BOOL CALLBACK -search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam) -{ - wchar_t test_locale[LOCALE_NAME_MAX_LENGTH]; - wchar_t **argv; - - (void) (dwFlags); - - argv = (wchar_t **) lparam; - *argv[2] = (wchar_t) 0; - - memset(test_locale, 0, sizeof(test_locale)); - - /* Get the name of the <Language> in English */ - if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME, - test_locale, LOCALE_NAME_MAX_LENGTH)) - { - /* - * If the enumerated locale does not have a hyphen ("en") OR the - * lc_message input does not have an underscore ("English"), we only - * need to compare the <Language> tags. - */ - if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL) - { - if (_wcsicmp(argv[0], test_locale) == 0) - { - wcscpy(argv[1], pStr); - *argv[2] = (wchar_t) 1; - return FALSE; - } - } - - /* - * We have to compare a full <Language>_<Country> tag, so we append - * the underscore and name of the country/region in English, e.g. - * "English_United States". - */ - else - { - size_t len; - - wcscat(test_locale, L"_"); - len = wcslen(test_locale); - if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME, - test_locale + len, - LOCALE_NAME_MAX_LENGTH - len)) - { - if (_wcsicmp(argv[0], test_locale) == 0) - { - wcscpy(argv[1], pStr); - *argv[2] = (wchar_t) 1; - return FALSE; - } - } - } - } - - return TRUE; -} - -/* - * This function converts a Windows locale name to an ISO formatted version - * for Visual Studio 2015 or greater. - * - * Returns NULL, if no valid conversion was found. + * Historically, verbose, but unsystematic and unstable names like + * "Thai_Thailand.874" were supported, but now only BCP 47 input is expected. + * That means we just need to be able to convert "en-US" to "en_US". */ static char * -get_iso_localename(const char *winlocname) +PosixLocaleName(const char *winlocname) { - wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH]; - wchar_t buffer[LOCALE_NAME_MAX_LENGTH]; - static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH]; - char *period; - int len; - int ret_val; - - /* - * Valid locales have the following syntax: - * <Language>[_<Country>[.<CodePage>]] - * - * GetLocaleInfoEx can only take locale name without code-page and for the - * purpose of this API the code-page doesn't matter. - */ - period = strchr(winlocname, '.'); - if (period != NULL) - len = period - winlocname; - else - len = pg_mbstrlen(winlocname); - - memset(wc_locale_name, 0, sizeof(wc_locale_name)); - memset(buffer, 0, sizeof(buffer)); - MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name, - LOCALE_NAME_MAX_LENGTH); - - /* - * If the lc_messages is already a Unix-style string, we have a direct - * match with LOCALE_SNAME, e.g. en-US, en_US. - */ - ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer, - LOCALE_NAME_MAX_LENGTH); - if (!ret_val) - { - /* - * Search for a locale in the system that matches language and country - * name. - */ - wchar_t *argv[3]; - - argv[0] = wc_locale_name; - argv[1] = buffer; - argv[2] = (wchar_t *) &ret_val; - EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv, - NULL); - } - - if (ret_val) - { - size_t rc; - char *hyphen; - - /* Locale names use only ASCII, any conversion locale suffices. */ - rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL); - if (rc == -1 || rc == sizeof(iso_lc_messages)) - return NULL; - - /* - * Simply replace the hyphen with an underscore. See comments in - * IsoLocaleName. - */ - hyphen = strchr(iso_lc_messages, '-'); - if (hyphen) - *hyphen = '_'; - return iso_lc_messages; - } - - return NULL; -} - -static char * -IsoLocaleName(const char *winlocname) -{ -#if defined(_MSC_VER) - static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH]; + char iso_lc_messages[LOCALE_NAME_MAX_LENGTH]; + char *hyphen; if (pg_strcasecmp("c", winlocname) == 0 || pg_strcasecmp("posix", winlocname) == 0) { strcpy(iso_lc_messages, "C"); - return iso_lc_messages; } else - return get_iso_localename(winlocname); - -#endif /* defined(_MSC_VER) */ - return NULL; /* Not supported on this version of msvc/mingw */ + { + strlcpy(iso_lc_messages, winlocname, sizeof(iso_lc_messages)); + hypen = strchr(iso_lc_messages, '-'); + if (hyphen) + *hyphen = '_'; + } + return pg_strcpy(iso_lc_messages); } #endif /* WIN32 && LC_MESSAGES */ @@ -1680,11 +1494,6 @@ get_collation_actual_version(char collprovider, const char *collcollate) ereport(ERROR, (errmsg("could not load locale \"%s\"", collcollate))); #elif defined(WIN32) - /* - * If we are targeting Windows Vista and above, we can ask for a name - * given a collation name (earlier versions required a location code - * that we don't have). - */ NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)}; WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH]; char copy_collcollate[LOCALE_NAME_MAX_LENGTH]; @@ -1703,13 +1512,7 @@ get_collation_actual_version(char collprovider, const char *collcollate) LOCALE_NAME_MAX_LENGTH); if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version)) { - /* - * GetNLSVersionEx() wants a language tag such as "en-US", not a - * locale name like "English_United States.1252". Until those - * values can be prevented from entering the system, or 100% - * reliably converted to the more useful tag format, tolerate the - * resulting error and report that we have no version data. - */ + /* Old style locale names fail here, so ignore. */ if (GetLastError() == ERROR_INVALID_PARAMETER) return NULL; diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index 4de5bf3bf6..a569433811 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -445,15 +445,6 @@ extern int _pgstat64(const char *name, struct stat *buf); #undef setlocale #endif -/* - * Define our own wrapper macro around setlocale() to work around bugs in - * Windows' native setlocale() function. - */ -extern char *pgwin32_setlocale(int category, const char *locale); - -#define setlocale(a,b) pgwin32_setlocale(a,b) - - /* In backend/port/win32/signal.c */ extern PGDLLIMPORT volatile int pg_signal_queue; extern PGDLLIMPORT int pg_signal_mask; diff --git a/src/port/win32setlocale.c b/src/port/win32setlocale.c deleted file mode 100644 index aadd09a4e9..0000000000 --- a/src/port/win32setlocale.c +++ /dev/null @@ -1,193 +0,0 @@ -/*------------------------------------------------------------------------- - * - * win32setlocale.c - * Wrapper to work around bugs in Windows setlocale() implementation - * - * Copyright (c) 2011-2022, PostgreSQL Global Development Group - * - * IDENTIFICATION - * src/port/win32setlocale.c - * - * - * The setlocale() function in Windows is broken in two ways. First, it - * has a problem with locale names that have a dot in the country name. For - * example: - * - * "Chinese (Traditional)_Hong Kong S.A.R..950" - * - * For some reason, setlocale() doesn't accept that as argument, even though - * setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts - * various alternative names for such countries, so to work around the broken - * setlocale() function, we map the troublemaking locale names to accepted - * aliases, before calling setlocale(). - * - * The second problem is that the locale name for "Norwegian (Bokmål)" - * contains a non-ASCII character. That's problematic, because it's not clear - * what encoding the locale name itself is supposed to be in, when you - * haven't yet set a locale. Also, it causes problems when the cluster - * contains databases with different encodings, as the locale name is stored - * in the pg_database system catalog. To work around that, when setlocale() - * returns that locale name, map it to a pure-ASCII alias for the same - * locale. - *------------------------------------------------------------------------- - */ - -#include "c.h" - -#undef setlocale - -struct locale_map -{ - /* - * String in locale name to replace. Can be a single string (end is NULL), - * or separate start and end strings. If two strings are given, the locale - * name must contain both of them, and everything between them is - * replaced. This is used for a poor-man's regexp search, allowing - * replacement of "start.*end". - */ - const char *locale_name_start; - const char *locale_name_end; - - const char *replacement; /* string to replace the match with */ -}; - -/* - * Mappings applied before calling setlocale(), to the argument. - */ -static const struct locale_map locale_map_argument[] = { - /* - * "HKG" is listed here: - * http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx - * (Country/Region Strings). - * - * "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the - * above list, but seems to work anyway. - */ - {"Hong Kong S.A.R.", NULL, "HKG"}, - {"U.A.E.", NULL, "ARE"}, - - /* - * The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't - * seem to recognize that. And Macau isn't listed in the table of accepted - * abbreviations linked above. Fortunately, "ZHM" seems to be accepted as - * an alias for "Chinese (Traditional)_Macau S.A.R..950". I'm not sure - * where "ZHM" comes from, must be some legacy naming scheme. But hey, it - * works. - * - * Note that unlike HKG and ARE, ZHM is an alias for the *whole* locale - * name, not just the country part. - * - * Some versions of Windows spell it "Macau", others "Macao". - */ - {"Chinese (Traditional)_Macau S.A.R..950", NULL, "ZHM"}, - {"Chinese_Macau S.A.R..950", NULL, "ZHM"}, - {"Chinese (Traditional)_Macao S.A.R..950", NULL, "ZHM"}, - {"Chinese_Macao S.A.R..950", NULL, "ZHM"}, - {NULL, NULL, NULL} -}; - -/* - * Mappings applied after calling setlocale(), to its return value. - */ -static const struct locale_map locale_map_result[] = { - /* - * "Norwegian (Bokmål)" locale name contains the a-ring character. - * Map it to a pure-ASCII alias. - * - * It's not clear what encoding setlocale() uses when it returns the - * locale name, so to play it safe, we search for "Norwegian (Bok*l)". - * - * Just to make life even more complicated, some versions of Windows spell - * the locale name without parentheses. Translate that too. - */ - {"Norwegian (Bokm", "l)_Norway", "Norwegian_Norway"}, - {"Norwegian Bokm", "l_Norway", "Norwegian_Norway"}, - {NULL, NULL, NULL} -}; - -#define MAX_LOCALE_NAME_LEN 100 - -static const char * -map_locale(const struct locale_map *map, const char *locale) -{ - static char aliasbuf[MAX_LOCALE_NAME_LEN]; - int i; - - /* Check if the locale name matches any of the problematic ones. */ - for (i = 0; map[i].locale_name_start != NULL; i++) - { - const char *needle_start = map[i].locale_name_start; - const char *needle_end = map[i].locale_name_end; - const char *replacement = map[i].replacement; - char *match; - char *match_start = NULL; - char *match_end = NULL; - - match = strstr(locale, needle_start); - if (match) - { - /* - * Found a match for the first part. If this was a two-part - * replacement, find the second part. - */ - match_start = match; - if (needle_end) - { - match = strstr(match_start + strlen(needle_start), needle_end); - if (match) - match_end = match + strlen(needle_end); - else - match_start = NULL; - } - else - match_end = match_start + strlen(needle_start); - } - - if (match_start) - { - /* Found a match. Replace the matched string. */ - int matchpos = match_start - locale; - int replacementlen = strlen(replacement); - char *rest = match_end; - int restlen = strlen(rest); - - /* check that the result fits in the static buffer */ - if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN) - return NULL; - - memcpy(&aliasbuf[0], &locale[0], matchpos); - memcpy(&aliasbuf[matchpos], replacement, replacementlen); - /* includes null terminator */ - memcpy(&aliasbuf[matchpos + replacementlen], rest, restlen + 1); - - return aliasbuf; - } - } - - /* no match, just return the original string */ - return locale; -} - -char * -pgwin32_setlocale(int category, const char *locale) -{ - const char *argument; - char *result; - - if (locale == NULL) - argument = NULL; - else - argument = map_locale(locale_map_argument, locale); - - /* Call the real setlocale() function */ - result = setlocale(category, argument); - - /* - * setlocale() is specified to return a "char *" that the caller is - * forbidden to modify, so casting away the "const" is innocuous. - */ - if (result) - result = unconstify(char *, map_locale(locale_map_result, result)); - - return result; -} diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index cc7a908d10..fff378d2b3 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -109,7 +109,7 @@ sub mkvcbuild pqsignal.c mkdtemp.c qsort.c qsort_arg.c bsearch_arg.c quotes.c system.c strerror.c tar.c win32env.c win32error.c win32ntdll.c - win32security.c win32setlocale.c win32stat.c); + win32security.c win32stat.c); push(@pgportfiles, 'strtof.c') if ($vsVersion < '14.00'); -- 2.30.2