In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/a6ba75c35c57de2634ae9428fe047ee2bfc9f4f3?hp=af5735030ecb884df4c76ca1466de7bdd777546e>
- Log ----------------------------------------------------------------- commit a6ba75c35c57de2634ae9428fe047ee2bfc9f4f3 Merge: af5735030e 8e13243a89 Author: Karl Williamson <[email protected]> Date: Mon Mar 4 13:03:02 2019 -0700 Merge branch 'crippled_locales' into blead This adds Configure probes to find platforms that have crippled locale implementations, in which they pretend that they allow for locales, but really don't. Then use these probe results to inform our locale handling; otherwise our locale tests on such systems would otherwise fail. commit 8e13243a89cbac8e8110cc8ac0674183cfa30bf7 Author: Karl Williamson <[email protected]> Date: Mon Mar 4 12:27:59 2019 -0700 Properly handle systems with crippled locales Some systems fake their locales, so that they pretend to accept a locale change, but they either do nothing, making everything the C locale, or on some systems there is a a second locale "C-UTF-8" that can be switched to. Configure probes have been added to find such systems, and this commit changes to use the results of these probes, so that we don't try looking for other locales (any names we came up with would be accepted as valid, but don't work, and tests were failing as a result). Anything running the musl library fits, as does OpenBSD and its kin, as they view locales as security risks. This commit allows us to take out some code that was looking for particular OS's. commit 201f75a992613afb464a3a412b8a271c8726ecb3 Author: Karl Williamson <[email protected]> Date: Mon Mar 4 11:56:05 2019 -0700 t/loc_tools.pl: Only try C.UTF-8 if Configure says avail I added a Configure probe for this capability, since some platforms will say yes to any locale name, so we can't really test for it in perl. commit 5b64f24c7b79ab856726455c6cee2237c97dd6dd Author: Karl Williamson <[email protected]> Date: Sat Feb 16 22:01:44 2019 -0700 locale.c: Tighten turkish locale tests on C99 platforms C99 has wide character case changing. If those are available, use them to be surer we have a Turkic locale. commit 2e3ef32f4acc106de6ae8170099f09a36d4ef490 Author: Karl Williamson <[email protected]> Date: Sat Feb 16 22:12:41 2019 -0700 Improve setlocale() detection in Configure-ish files This also now notes some behavior of setlocale commit 0806cdda2789ca6394976d1ff3e65dd59bcb8d1b Author: Karl Williamson <[email protected]> Date: Sat Feb 16 21:55:12 2019 -0700 Add towupper() and towlower() to Configure-ish files commit 92271d410d92433b97015908e12c9f262c5953e7 Author: Karl Williamson <[email protected]> Date: Sat Feb 16 21:31:29 2019 -0700 Add wctype.h to Configure-ish files ----------------------------------------------------------------------- Summary of changes: Configure | 199 ++++++++++++++++++++++++++++++++++++++++- Cross/config.sh-arm-linux | 4 + Cross/config.sh-arm-linux-n770 | 4 + NetWare/config.wc | 4 + Porting/config.sh | 4 + config_h.SH | 34 +++++-- configure.com | 5 ++ lib/locale.t | 2 +- locale.c | 19 +++- metaconfig.h | 4 + plan9/config_sh.sample | 4 + symbian/config.sh | 4 + t/loc_tools.pl | 33 +++---- t/run/locale.t | 18 ++-- uconfig.h | 38 ++++++-- uconfig.sh | 4 + uconfig64.sh | 4 + win32/config.ce | 4 + win32/config.gc | 4 + win32/config.vc | 4 + 20 files changed, 350 insertions(+), 46 deletions(-) diff --git a/Configure b/Configure index b91d1565b1..0c525c22f2 100755 --- a/Configure +++ b/Configure @@ -762,7 +762,9 @@ d_sethostent_r='' sethostent_r_proto='' d_setitimer='' d_setlinebuf='' +d_has_C_UTF8='' d_setlocale='' +d_setlocale_accepts_any_locale_name='' d_setlocale_r='' setlocale_r_proto='' d_setnent='' @@ -904,6 +906,8 @@ clocktype='' d_times='' d_tmpnam_r='' tmpnam_r_proto='' +d_towlower='' +d_towupper='' d_trunc='' d_truncate='' d_truncl='' @@ -1072,6 +1076,7 @@ i_ustat='' i_utime='' i_vfork='' i_wchar='' +i_wctype='' d_inc_version_list='' inc_version_list='' inc_version_list_init='' @@ -17784,14 +17789,191 @@ eval $inlibc set setlinebuf d_setlinebuf eval $inlibc -: see if setlocale exists -set setlocale d_setlocale -eval $inlibc - : see if locale.h is available set locale.h i_locale eval $inhdr +: see if this system has wctype.h +set wctype.h i_wctype +eval $inhdr + +: see if towupper exists +set towupper d_towupper +eval $inlibc + +: check for setlocale function and behavior +$cat <<EOM + +Checking to see if you have setlocale() and its behavior +EOM +$cat >try.c <<EOCP +#$i_stdlib I_STDLIB +#ifdef I_STDLIB +# include <stdlib.h> +#endif +#include <string.h> +#$i_locale I_LOCALE +#ifdef I_LOCALE +# include <locale.h> +#endif +#$i_wctype I_WCTYPE +#ifdef I_WCTYPE +# include <wctype.h> +#endif + +int main() { + const char * invalid_name = "\a"; /* This is really invalid! */ + int accepts_any_locale_name = 0; + int has_C_UTF8 = 0; + unsigned char bad_setlocale = 255; + + /* If LC_CTYPE isn't defined the compilation will fail, and locales will be + * disabled. It's hard to imagine an instance where meaningful locale + * handling could be done without LC_CTYPE */ + const char * name = setlocale(LC_CTYPE, "C"); + + if (name == NULL || strcmp(name, "C") != 0) { + exit(bad_setlocale); + } + + name = setlocale(LC_CTYPE, invalid_name); + if (name != NULL) { + + /* Let it pass if it accepts the name but gives back one of the C + * locales */ + if (strcmp(name, "C") != 0 && strcmp(name, "C.UTF-8") != 0) { + accepts_any_locale_name = 1; + } + } + + name = setlocale(LC_CTYPE, "C.UTF-8"); + if (name != NULL) { + unsigned char y_with_diaeresis = ('A' == 193) ? 0xDF : 0xFF; + +#$d_towupper HAS_TOWUPPER +#ifdef HAS_TOWUPPER + + /* We assume that if the machine doesn't have the C99 towupper, it + * doesn't have C.UTF-8, even if we successfully changed locales to + * include it. This seems safer even on platforms that didn't accept + * the really invalid name */ + + if (towupper(y_with_diaeresis) == 0x178) { + has_C_UTF8 = 1; + } + +#endif + + } + +#if 0 + + /* Currently unused code to determine if LC_ALL with disparate values uses + * category=value pairs or positional, and to determine the separator + * between the categories. We could add code so that if the separator were + * > '9', we subtract 10; similarly for 'Z' and 'z', and then just about + * every possible ASCII separator would fit in the 5 bits available in the + * exit code. This would not be true in EBCDIC. And then if LC_ALL is + * positional, we probably would want to know the order of the categories. + * Using a file between the C program and the shell script would really be + * require to do that */ +#ifdef LC_ALL + + unsigned char min_separator = ' ' - 1; + unsigned char separator = min_separator; + int uses_name_value_pair_names = 0; + + name = setlocale(LC_ALL, "C"); + if (name == NULL || strcmp(name, "C") != 0) { + exit(bad_setlocale); + } + + if (has_C_UTF8) { + char * pos; + + name = setlocale(LC_CTYPE, "C.UTF-8"); + if (name == NULL) { + exit(bad_setlocale); + } + name = setlocale(LC_ALL, NULL); + if (name == NULL) { + exit(bad_setlocale); + } + + pos = strstr(name, "LC_CTYPE=C.UTF-8"); + if (pos != NULL) { + uses_name_value_pair_names = 1; + if (pos == name) { + separator = name[sizeof("LC_CTYPE=C.UTF-8") - 1]; + } + else { + separator = *(pos - 1); + } + } + else { + pos = strstr(name, "C.UTF-8"); + if (pos == NULL) { + /* bad */ + } + else if (pos == name) { + separator = name[sizeof("C.UTF-8") - 1]; + } + else { + separator = *(pos - 1); + } + } + } + +#endif +#endif + + exit( 0 /* (separator - min_separator) << 3 + | uses_name_value_pair_names << 2 + */ + | has_C_UTF8 << 1 + | accepts_any_locale_name); + +} +EOCP +set try +if eval $compile; then + echo "Your system has setlocale()..." >&4 + $run ./try + case $? in + 0) echo "and it seems sane" >&4 + d_setlocale="$define" + d_setlocale_accepts_any_locale_name="$undef" + d_has_C_UTF8="false" + ;; + 1) echo "and it seems sane, but accepts any locale name as valid" >&4 + d_setlocale="$define" + d_setlocale_accepts_any_locale_name="$define" + d_has_C_UTF8="false" + ;; + 2) echo "and it seems sane" >&4 + d_setlocale="$define" + d_setlocale_accepts_any_locale_name="$undef" + d_has_C_UTF8="true" + ;; + 3) echo "and it seems sane, but accepts any locale name as valid" >&4 + d_setlocale="$define" + d_setlocale_accepts_any_locale_name="$define" + d_has_C_UTF8="true" + ;; + *) echo "but it doesn't seem to work, so we won't use it." >&4 + d_setlocale="$undef" + d_setlocale_accepts_any_locale_name="$undef" + d_has_C_UTF8="false" + ;; + esac +else + echo "your system does not have setlocale()" >&4 + d_setlocale="$undef" + d_setlocale_accepts_any_locale_name="$undef" + d_has_C_UTF8="false" +fi +$rm_try + : see if setlocale_r exists set setlocale_r d_setlocale_r eval $inlibc @@ -19347,6 +19529,10 @@ case "$d_tmpnam_r" in ;; esac +: see if towlower exists +set towlower d_towlower +eval $inlibc + : see if trunc exists set trunc d_trunc eval $inlibc @@ -24040,6 +24226,7 @@ d_gmtime64='$d_gmtime64' d_gmtime_r='$d_gmtime_r' d_gnulibc='$d_gnulibc' d_grpasswd='$d_grpasswd' +d_has_C_UTF8='$d_has_C_UTF8' d_hasmntopt='$d_hasmntopt' d_htonl='$d_htonl' d_hypot='$d_hypot' @@ -24227,6 +24414,7 @@ d_sethostent_r='$d_sethostent_r' d_setitimer='$d_setitimer' d_setlinebuf='$d_setlinebuf' d_setlocale='$d_setlocale' +d_setlocale_accepts_any_locale_name='$d_setlocale_accepts_any_locale_name' d_setlocale_r='$d_setlocale_r' d_setnent='$d_setnent' d_setnetent_r='$d_setnetent_r' @@ -24334,6 +24522,8 @@ d_times='$d_times' d_tm_tm_gmtoff='$d_tm_tm_gmtoff' d_tm_tm_zone='$d_tm_tm_zone' d_tmpnam_r='$d_tmpnam_r' +d_towlower='$d_towlower' +d_towupper='$d_towupper' d_trunc='$d_trunc' d_truncate='$d_truncate' d_truncl='$d_truncl' @@ -24553,6 +24743,7 @@ i_ustat='$i_ustat' i_utime='$i_utime' i_vfork='$i_vfork' i_wchar='$i_wchar' +i_wctype='$i_wctype' i_xlocale='$i_xlocale' ignore_versioned_solibs='$ignore_versioned_solibs' inc_version_list='$inc_version_list' diff --git a/Cross/config.sh-arm-linux b/Cross/config.sh-arm-linux index 17d998f1ec..13a1427381 100644 --- a/Cross/config.sh-arm-linux +++ b/Cross/config.sh-arm-linux @@ -492,6 +492,7 @@ d_sethostent_r='undef' d_setitimer='define' d_setlinebuf='define' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='define' d_setnetent_r='undef' @@ -598,6 +599,8 @@ d_times='define' d_tm_tm_gmtoff='define' d_tm_tm_zone='define' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='define' d_truncl='define' @@ -811,6 +814,7 @@ i_ustat='define' i_utime='define' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='y' inc_version_list=' ' diff --git a/Cross/config.sh-arm-linux-n770 b/Cross/config.sh-arm-linux-n770 index a82318c924..d1e1c30070 100644 --- a/Cross/config.sh-arm-linux-n770 +++ b/Cross/config.sh-arm-linux-n770 @@ -491,6 +491,7 @@ d_sethostent_r='undef' d_setitimer='define' d_setlinebuf='define' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='define' d_setnetent_r='undef' @@ -597,6 +598,8 @@ d_times='define' d_tm_tm_gmtoff='define' d_tm_tm_zone='define' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='define' d_truncl='define' @@ -809,6 +812,7 @@ i_ustat='define' i_utime='define' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='y' inc_version_list=' ' diff --git a/NetWare/config.wc b/NetWare/config.wc index e0e880d7de..9173c9de0f 100644 --- a/NetWare/config.wc +++ b/NetWare/config.wc @@ -481,6 +481,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='undef' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -588,6 +589,8 @@ d_times='undef' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='undef' d_truncl='undef' @@ -793,6 +796,7 @@ i_ustat='undef' i_utime='define' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='' inc_version_list='' diff --git a/Porting/config.sh b/Porting/config.sh index 946b0f1e6b..2a0532df52 100644 --- a/Porting/config.sh +++ b/Porting/config.sh @@ -507,6 +507,7 @@ d_sethostent_r='undef' d_setitimer='define' d_setlinebuf='define' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='define' d_setnetent_r='undef' @@ -614,6 +615,8 @@ d_times='define' d_tm_tm_gmtoff='define' d_tm_tm_zone='define' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='define' d_truncate='define' d_truncl='define' @@ -833,6 +836,7 @@ i_ustat='define' i_utime='define' i_vfork='undef' i_wchar='define' +i_wctype='undef' i_xlocale='define' ignore_versioned_solibs='y' inc_version_list='' diff --git a/config_h.SH b/config_h.SH index 388c1fe050..f26ed2f3bb 100755 --- a/config_h.SH +++ b/config_h.SH @@ -439,12 +439,6 @@ sed <<!GROK!THIS! >$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_setlinebuf HAS_SETLINEBUF /**/ -/* HAS_SETLOCALE: - * This symbol, if defined, indicates that the setlocale routine is - * available to handle locale-specific ctype implementations. - */ -#$d_setlocale HAS_SETLOCALE /**/ - /* HAS_SETPGID: * This symbol, if defined, indicates that the setpgid(pid, gpid) * routine is available to set process group ID. @@ -3266,6 +3260,17 @@ sed <<!GROK!THIS! >$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_setitimer HAS_SETITIMER /**/ +/* HAS_SETLOCALE: + * This symbol, if defined, indicates that the setlocale routine is + * available to handle locale-specific ctype implementations. + */ +/* SETLOCALE_ACCEPTS_ANY_LOCALE_NAME: + * This symbol, if defined, indicates that the setlocale routine is + * available and it accepts any input locale name as valid. + */ +#$d_setlocale HAS_SETLOCALE /**/ +#$d_setlocale_accepts_any_locale_name SETLOCALE_ACCEPTS_ANY_LOCALE_NAME /**/ + /* HAS_SETPROCTITLE: * This symbol, if defined, indicates that the setproctitle routine is * available to set process title. @@ -3494,6 +3499,18 @@ sed <<!GROK!THIS! >$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$d_timegm HAS_TIMEGM /**/ +/* HAS_TOWLOWER: + * This symbol, if defined, indicates that the towlower () routine is + * available to do string concatenation. + */ +#$d_towlower HAS_TOWLOWER /**/ + +/* HAS_TOWUPPER: + * This symbol, if defined, indicates that the towupper () routine is + * available to do string concatenation. + */ +#$d_towupper HAS_TOWUPPER /**/ + /* HAS_TRUNC: * This symbol, if defined, indicates that the trunc routine is * available to round doubles towards zero. @@ -3796,6 +3813,11 @@ sed <<!GROK!THIS! >$CONFIG_H -e 's!^#undef\(.*/\)\*!/\*#define\1 \*!' -e 's!^#un */ #$i_wchar I_WCHAR /**/ +/* I_WCTYPE: + * This symbol, if defined, indicates that <wctype.h> exists. + */ +#$i_wctype I_WCTYPE /**/ + /* DOUBLEINFBYTES: * This symbol, if defined, is a comma-separated list of * hexadecimal bytes for the double precision infinity. diff --git a/configure.com b/configure.com index 70a80455e6..1f3e73c681 100644 --- a/configure.com +++ b/configure.com @@ -5438,6 +5438,7 @@ $ d_mbtowc="define" $ d_mktime="define" $ d_nl_langinfo="define" $ d_setlocale="define" +$ d_setlocale_accepts_any_locale_name="undef" $ d_stdiobase="define" $ d_stdio_cnt_lval="define" $ d_stdio_ptr_lval="define" @@ -6312,6 +6313,7 @@ $ WC "d_sethent='" + d_sethent + "'" $ WC "d_setitimer='" + d_setitimer + "'" $ WC "d_setlinebuf='undef'" $ WC "d_setlocale='" + d_setlocale + "'" +$ WC "d_setlocale_accepts_any_locale_name='" + d_setlocale_accepts_any_locale_name + "'" $ WC "d_setnent='" + d_setnent + "'" $ WC "d_setpent='" + d_setpent + "'" $ WC "d_setpgid='" + d_setpgid + "'" @@ -6600,6 +6602,7 @@ $ WC "i_ustat='undef'" $ WC "i_utime='" + i_utime + "'" $ WC "i_vfork='undef'" $ WC "i_wchar='define'" +$ WC "i_wctype='define'" $ WC "i_xlocale='undef'" $ WC "inc_version_list='0'" $ WC "inc_version_list_init='0'" @@ -6934,6 +6937,8 @@ $ WC "d_srandom_r='undef'" $ WC "d_strerror_l='undef'" $ WC "d_strerror_r='undef'" $ WC "d_tmpnam_r='undef'" +$ WC "d_towlower='define'" +$ WC "d_towupper='define'" $ WC "d_ttyname_r='" + d_ttyname_r + "'" $ WC "d_uselocale='undef'" $ WC "ctermid_r_proto='0'" diff --git a/lib/locale.t b/lib/locale.t index 0f43dff6bd..4c324eab1c 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -2543,7 +2543,7 @@ foreach $test_num ($first_locales_test_number..$final_locales_test_number) { $test_num = $final_locales_test_number; -unless ( $os =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) { +if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) { # perl #115808 use warnings; my $warned = 0; diff --git a/locale.c b/locale.c index 3a2e49d72d..2b123d16ee 100644 --- a/locale.c +++ b/locale.c @@ -53,6 +53,9 @@ #ifdef I_WCHAR # include <wchar.h> #endif +#ifdef I_WCTYPE +# include <wctype.h> +#endif /* If the environment says to, we can output debugging information during * initialization. This is done before option parsing, and before any thread @@ -586,8 +589,11 @@ S_emulate_setlocale(const int category, /* If this assert fails, adjust the size of curlocales in intrpvar.h */ STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX); -# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING) - +# if defined(_NL_LOCALE_NAME) \ + && defined(DEBUGGING) \ + && ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME) + /* On systems that accept any locale name, the real underlying locale + * is often returned by this internal function, so we can't use it */ { /* Internal glibc for querylocale(), but doesn't handle * empty-string ("") locale properly; who knows what other @@ -1528,7 +1534,16 @@ S_new_ctype(pTHX_ const char *newctype) /* UTF-8 locales can have special handling for 'I' and 'i' if they are * Turkic. Make sure these two are the only anomalies. (We don't use * towupper and towlower because they aren't in C89.) */ + +#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) + + if (towupper('i') == 0x130 && towlower('I') == 0x131) { + +#else + if (toupper('i') == 'i' && tolower('I') == 'I') { + +#endif check_for_problems = TRUE; maybe_utf8_turkic = TRUE; } diff --git a/metaconfig.h b/metaconfig.h index 2f9ce80712..73e0ab0257 100644 --- a/metaconfig.h +++ b/metaconfig.h @@ -23,4 +23,8 @@ * HAS_STRTOD_L * HAS_STRTOLD_L * I_WCHAR + * I_WCTYPE + * HAS_TOWLOWER + * HAS_TOWUPPER + * SETLOCALE_ACCEPTS_ANY_LOCALE_NAME */ diff --git a/plan9/config_sh.sample b/plan9/config_sh.sample index d9150cc0fa..78bc1daf03 100644 --- a/plan9/config_sh.sample +++ b/plan9/config_sh.sample @@ -492,6 +492,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='define' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -598,6 +599,8 @@ d_times='define' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='undef' d_truncl='undef' @@ -805,6 +808,7 @@ i_ustat='undef' i_utime='define' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='' inc_version_list=' ' diff --git a/symbian/config.sh b/symbian/config.sh index 7810dc153d..8b3122b286 100644 --- a/symbian/config.sh +++ b/symbian/config.sh @@ -439,6 +439,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='undef' d_setlocale='undef' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -547,6 +548,8 @@ d_times='define' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='undef' d_truncl='undef' @@ -736,6 +739,7 @@ i_varargs='undef' i_varhdr='stdarg.h' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='y' inc_version_list='' diff --git a/t/loc_tools.pl b/t/loc_tools.pl index 4b80ed5360..310eb2cabf 100644 --- a/t/loc_tools.pl +++ b/t/loc_tools.pl @@ -109,21 +109,14 @@ sub _trylocale ($$$$) { # For use only by other functions in this file! # systems return if $locale =~ / ^ pig $ /ix; - # As of 6.3, this platform's locale handling is basically broken. khw - # filed a bug report (no ticket number was returned), and it is supposedly - # going to change in a future release, so the statements here below sunset - # for any larger version, at which point this may start failing and have - # to be revisited. - # - # Given a legal individual category, basically whatever you set the locale - # to, the return from setlocale() indicates that it has taken effect, even - # if it hasn't. However, the return from querying LC_ALL won't reflect - # this. - if ($Config{osname} =~ /openbsd/i && $locale !~ / ^ (?: C | POSIX ) $/ix) { - my ($major, $minor) = $Config{osvers} =~ / ^ ( \d+ ) \. ( \d+ ) /ax; - return if ! defined $major || ! defined $minor - || $major < 6 || ($major == 6 && $minor <= 3); - } + # Certain platforms have a crippled locale system in which setlocale + # returns success for just about any possible locale name, but if anything + # actually happens as a result of the call, it is that the underlying + # locale is set to a system default, likely C or C.UTF-8. We can't test + # such systems fully, but we shouldn't disable the user from using + # locales, as it may work out for them (or not). + return if defined $Config{d_setlocale_accepts_any_locale_name} + && $locale !~ / ^ (?: C | POSIX | C\.UTF-8 ) $/ix; $categories = [ $categories ] unless ref $categories; @@ -337,7 +330,15 @@ sub find_locales ($;$) { my @Locale; _trylocale("C", $categories, \@Locale, $allow_incompatible); _trylocale("POSIX", $categories, \@Locale, $allow_incompatible); - _trylocale("C.UTF-8", $categories, \@Locale, $allow_incompatible); + + if ($Config{d_has_C_UTF8} eq 'true') { + _trylocale("C.UTF-8", $categories, \@Locale, $allow_incompatible); + } + + # There's no point in looking at anything more if we know that setlocale + # will return success on any garbage or non-garbage name. + return sort @Locale if defined $Config{d_setlocale_accepts_any_locale_name}; + foreach (1..16) { _trylocale("ISO8859-$_", $categories, \@Locale, $allow_incompatible); _trylocale("iso8859$_", $categories, \@Locale, $allow_incompatible); diff --git a/t/run/locale.t b/t/run/locale.t index 0296c9af67..78cfc2ff72 100644 --- a/t/run/locale.t +++ b/t/run/locale.t @@ -68,7 +68,7 @@ EOF my $non_C_locale; foreach my $locale (@locales) { - next if $locale eq "C" || $locale eq 'POSIX'; + next if $locale eq "C" || $locale eq 'POSIX' || $locale eq "C.UTF-8"; $non_C_locale = $locale; last; } @@ -460,15 +460,15 @@ EOF } SKIP: { - # Note: a Configure probe could be written to give us the syntax to - # use, but khw doesn't think it's worth it. If the POSIX 2008 locale - # functions are being used, the syntax becomes mostly irrelevant, so - # do the test anyway if they are - # it's a lot of trouble to figure out in a perl script - if ($^O eq 'openbsd' && ( $Config{useithreads} ne 'define' - || ! ${^SAFE_LOCALES})) + # Note: the setlocale Configure probe could be enhanced to give us the + # syntax to use, but khw doesn't think it's worth it at this time, as + # the current outliers seem to be skipped by the test just below + # anyway. If the POSIX 2008 locale functions are being used, the + # syntax becomes mostly irrelevant, so do the test anyway if they are. + # It's a lot of trouble to figure out in a perl script. + if ($Config{d_setlocale_accepts_any_locale_name} eq 'true') { - skip("The setlocale() syntax used is invalid on this platform", 2); + skip("Can't distinguish between valid and invalid locale names on this system", 2); } my @valid_categories = valid_locale_categories(); diff --git a/uconfig.h b/uconfig.h index 37fed70fee..92dba8709c 100644 --- a/uconfig.h +++ b/uconfig.h @@ -404,12 +404,6 @@ */ /*#define HAS_SETLINEBUF / **/ -/* HAS_SETLOCALE: - * This symbol, if defined, indicates that the setlocale routine is - * available to handle locale-specific ctype implementations. - */ -/*#define HAS_SETLOCALE / **/ - /* HAS_SETPGID: * This symbol, if defined, indicates that the setpgid(pid, gpid) * routine is available to set process group ID. @@ -3231,6 +3225,17 @@ */ /*#define HAS_SETITIMER / **/ +/* HAS_SETLOCALE: + * This symbol, if defined, indicates that the setlocale routine is + * available to handle locale-specific ctype implementations. + */ +/* SETLOCALE_ACCEPTS_ANY_LOCALE_NAME: + * This symbol, if defined, indicates that the setlocale routine is + * available and it accepts any input locale name as valid. + */ +/*#define HAS_SETLOCALE / **/ +/*#define SETLOCALE_ACCEPTS_ANY_LOCALE_NAME / **/ + /* HAS_SETPROCTITLE: * This symbol, if defined, indicates that the setproctitle routine is * available to set process title. @@ -3459,6 +3464,18 @@ */ /*#define HAS_TIMEGM / **/ +/* HAS_TOWLOWER: + * This symbol, if defined, indicates that the towlower () routine is + * available to do string concatenation. + */ +/*#define HAS_TOWLOWER / **/ + +/* HAS_TOWUPPER: + * This symbol, if defined, indicates that the towupper () routine is + * available to do string concatenation. + */ +/*#define HAS_TOWUPPER / **/ + /* HAS_TRUNC: * This symbol, if defined, indicates that the trunc routine is * available to round doubles towards zero. @@ -3761,6 +3778,11 @@ */ /*#define I_WCHAR / **/ +/* I_WCTYPE: + * This symbol, if defined, indicates that <wctype.h> exists. + */ +/*#define I_WCTYPE / **/ + /* DOUBLEINFBYTES: * This symbol, if defined, is a comma-separated list of * hexadecimal bytes for the double precision infinity. @@ -5219,6 +5241,6 @@ #endif /* Generated from: - * 875df2221ab768da0a87740ad45e12bbc9e3b483bfa1a913727e982350ad74cf config_h.SH - * 24ca29f990de08f8119ad2c15f060361b86fc407caab088289d7d5ac3e0f117a uconfig.sh + * 2cc5c49f1266887de8224d2804aae6f80c43da0a62782d74866d5e828194698d config_h.SH + * d6d2fc8bf209b3d6db681ac0994a29a281afe35c3a96bb62079cb15b0605d035 uconfig.sh * ex: set ro: */ diff --git a/uconfig.sh b/uconfig.sh index e87725502c..f55def4255 100644 --- a/uconfig.sh +++ b/uconfig.sh @@ -432,6 +432,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='undef' d_setlocale='undef' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -538,6 +539,8 @@ d_times='undef' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='undef' d_truncl='undef' @@ -719,6 +722,7 @@ i_ustat='undef' i_utime='undef' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='y' inc_version_list_init='NULL' diff --git a/uconfig64.sh b/uconfig64.sh index 117fa87055..bfc6049b74 100644 --- a/uconfig64.sh +++ b/uconfig64.sh @@ -432,6 +432,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='undef' d_setlocale='undef' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -538,6 +539,8 @@ d_times='undef' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='undef' d_truncl='undef' @@ -719,6 +722,7 @@ i_ustat='undef' i_utime='undef' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='y' inc_version_list_init='NULL' diff --git a/win32/config.ce b/win32/config.ce index 9ab63a4da5..d8eb67d17b 100644 --- a/win32/config.ce +++ b/win32/config.ce @@ -479,6 +479,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='undef' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -586,6 +587,8 @@ d_times='define' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='undef' d_truncl='undef' @@ -789,6 +792,7 @@ i_ustat='undef' i_utime='define' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='' inc_version_list='' diff --git a/win32/config.gc b/win32/config.gc index 64cc3ca76b..66de52d7c0 100644 --- a/win32/config.gc +++ b/win32/config.gc @@ -480,6 +480,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='undef' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -586,6 +587,8 @@ d_times='define' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='define' d_truncate='undef' d_truncl='define' @@ -801,6 +804,7 @@ i_ustat='undef' i_utime='define' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='' inc_version_list='' diff --git a/win32/config.vc b/win32/config.vc index e44c31a143..5ece80459a 100644 --- a/win32/config.vc +++ b/win32/config.vc @@ -480,6 +480,7 @@ d_sethostent_r='undef' d_setitimer='undef' d_setlinebuf='undef' d_setlocale='define' +d_setlocale_accepts_any_locale_name='undef' d_setlocale_r='undef' d_setnent='undef' d_setnetent_r='undef' @@ -586,6 +587,8 @@ d_times='define' d_tm_tm_gmtoff='undef' d_tm_tm_zone='undef' d_tmpnam_r='undef' +d_towlower='undef' +d_towupper='undef' d_trunc='undef' d_truncate='undef' d_truncl='undef' @@ -800,6 +803,7 @@ i_ustat='undef' i_utime='define' i_vfork='undef' i_wchar='undef' +i_wctype='undef' i_xlocale='undef' ignore_versioned_solibs='' inc_version_list='' -- Perl5 Master Repository
