In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/992001bfb28aa89a918dfb566d0413ea40d9b0f5?hp=7ce1b4c45d9211e3a1d1630a5d0294b7f86ea037>
- Log ----------------------------------------------------------------- commit 992001bfb28aa89a918dfb566d0413ea40d9b0f5 Author: Karl Williamson <[email protected]> Date: Mon Sep 7 22:18:55 2015 -0600 Slightly shorten most regex patterns A compiled pattern requires a byte for each non-default modifier, like /i. Previously, the worst case was presumed in allocating the space (every modifier being non-default). Now, only the actual needed space is reserved. M globvar.sym M regcomp.c commit 308482c27259302fb2ca8c60b8383609a0e9f314 Author: Karl Williamson <[email protected]> Date: Mon Sep 7 10:03:27 2015 -0600 t/loc_tools.pl: Fix some bugs in locales_enabled() This code assumed that all locale categories were represented by non-negative whole numbers. However, it turns out that this assumption is wrong, as on AIX, LC_ALL is -1. This commit changes our assumption to take into account that reality; it now assumes that all categories are larger than a much more negative number, and now the new assumption is tested for, and if wrong, the code dies instead of silently doing the wrong thing. There was also a bug where if a locale category wasn't defined on the machine, but the corresponding #ifdef for using that category was still set, the category was improperly assumed to exist M lib/locale.t M t/loc_tools.pl commit d8f8a4817f5910267c45439ddb7764b371f06276 Author: Karl Williamson <[email protected]> Date: Tue Sep 8 09:39:18 2015 -0600 lib/locale.t: Use 'chomp' not 'chop' M lib/locale.t commit cf82bb84f419f9ee1d5beb2aaa94af3a6b9d5d44 Author: Karl Williamson <[email protected]> Date: Tue Sep 8 09:45:46 2015 -0600 lib/locale.t: sub ok() returns pass/fail This file rolls its own TAP, and it did not have its ok() return pass/fail. M lib/locale.t commit 51cdbd7cf1dd4d8b5b25f19d5d60fa1f1b672468 Author: Karl Williamson <[email protected]> Date: Sun Sep 6 10:24:45 2015 -0600 lib/locale.pm: Add an assertion It turns out that the code assumes that the values for LC_CTYPE, LC_MESSAGES, ... are small non-negative numbers, as a bit position is reserved for each of these. It's better to make this assumption explicit rather than getting hard-to-find failures. (LC_ALL doesn't have to be of this form, and is in fact -1 on AIX) M lib/locale.pm commit bbc981342c254b86d5bc82e5175169b68f0e59ce Author: Karl Williamson <[email protected]> Date: Fri May 8 15:19:56 2015 -0600 Add more -DL debugging info This adds more stuff that gets dumped when debugging locale handling. And it adds even more when the v modifier appears. M ext/POSIX/POSIX.xs M locale.c M perl.h commit 5d1187d1639ce42a8a9283c8282136fa16d41e50 Author: Karl Williamson <[email protected]> Date: Tue Sep 8 09:53:48 2015 -0600 Add code for debugging locale initialization This initialization is done before the processing of command line arguments, so that it has to be handled specially. This commit changes the initialization code to output debugging information if the environment variable PERL_DEBUG_LOCALE_INIT is set. I don't see the need to document this outside the source, as anyone who is using it would be reading the source anyway; it's of highly specialized use. M embed.fnc M embed.h M locale.c M makedef.pl M proto.h commit 6b058d4267db1fda2ada9ef3729c7477bbfa07c6 Author: Karl Williamson <[email protected]> Date: Tue Sep 8 09:52:57 2015 -0600 locale.c: Add clarifying comments M locale.c ----------------------------------------------------------------------- Summary of changes: embed.fnc | 6 ++ embed.h | 3 + ext/POSIX/POSIX.xs | 21 ++++- globvar.sym | 1 + lib/locale.pm | 14 ++- lib/locale.t | 17 +++- locale.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++------ makedef.pl | 1 + perl.h | 5 + proto.h | 6 ++ regcomp.c | 16 ++-- t/loc_tools.pl | 57 +++++++++--- 12 files changed, 357 insertions(+), 59 deletions(-) diff --git a/embed.fnc b/embed.fnc index ca6a5c7..f1abcd0 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2472,8 +2472,14 @@ s |char* |stdize_locale |NN char* locs #if defined(USE_LOCALE) \ && (defined(PERL_IN_LOCALE_C) || defined (PERL_EXT_POSIX)) ApM |bool |_is_cur_LC_category_utf8|int category +# ifdef DEBUGGING +AMnPpR |char * |_setlocale_debug_string|const int category \ + |NULLOK const char* const locale \ + |NULLOK const char* const retval +# endif #endif + #if defined(PERL_IN_UTIL_C) s |SV* |mess_alloc s |SV * |with_queued_errors|NN SV *ex diff --git a/embed.h b/embed.h index faa4112..3f6515f 100644 --- a/embed.h +++ b/embed.h @@ -781,6 +781,9 @@ #if defined(DEBUGGING) #define pad_setsv(a,b) Perl_pad_setsv(aTHX_ a,b) #define pad_sv(a) Perl_pad_sv(aTHX_ a) +# if defined(USE_LOCALE) && (defined(PERL_IN_LOCALE_C) || defined (PERL_EXT_POSIX)) +#define _setlocale_debug_string Perl__setlocale_debug_string +# endif #endif #if defined(HAS_SIGACTION) && defined(SA_SIGINFO) #define csighandler Perl_csighandler diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index 7d76af3..02c5c47 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -2269,6 +2269,9 @@ setlocale(category, locale = 0) #else retval = setlocale(category, locale); #endif + DEBUG_L(PerlIO_printf(Perl_debug_log, + "%s:%d: %s\n", __FILE__, __LINE__, + _setlocale_debug_string(category, locale, retval))); if (! retval) { /* Should never happen that a query would return an error, but be * sure and reset to C locale */ @@ -2298,8 +2301,12 @@ setlocale(category, locale = 0) { char *newctype; #ifdef LC_ALL - if (category == LC_ALL) + if (category == LC_ALL) { newctype = setlocale(LC_CTYPE, NULL); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: %s\n", __FILE__, __LINE__, + _setlocale_debug_string(LC_CTYPE, NULL, newctype))); + } else #endif newctype = RETVAL; @@ -2315,8 +2322,12 @@ setlocale(category, locale = 0) { char *newcoll; #ifdef LC_ALL - if (category == LC_ALL) + if (category == LC_ALL) { newcoll = setlocale(LC_COLLATE, NULL); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: %s\n", __FILE__, __LINE__, + _setlocale_debug_string(LC_COLLATE, NULL, newcoll))); + } else #endif newcoll = RETVAL; @@ -2332,8 +2343,12 @@ setlocale(category, locale = 0) { char *newnum; #ifdef LC_ALL - if (category == LC_ALL) + if (category == LC_ALL) { newnum = setlocale(LC_NUMERIC, NULL); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: %s\n", __FILE__, __LINE__, + _setlocale_debug_string(LC_NUMERIC, NULL, newnum))); + } else #endif newnum = RETVAL; diff --git a/globvar.sym b/globvar.sym index 1183d67..2943fc6 100644 --- a/globvar.sym +++ b/globvar.sym @@ -5,6 +5,7 @@ PL_No PL_Yes PL_bincompat_options +PL_bitcount PL_block_type PL_charclass PL_check diff --git a/lib/locale.pm b/lib/locale.pm index 9cc243f..53c01ff 100644 --- a/lib/locale.pm +++ b/lib/locale.pm @@ -102,10 +102,20 @@ sub import { } # Map our names to the ones defined by POSIX - $arg = "LC_" . uc($arg); + my $LC = "LC_" . uc($arg); - my $bit = eval "&POSIX::$arg"; + my $bit = eval "&POSIX::$LC"; if (defined $bit) { + + # Verify our assumption. + if (! ($bit >= 0 && $bit < 31)) { + require Carp; + Carp::croak("Cannot have ':$arg' parameter to 'use locale'" + . " on this platform. Use the 'perlbug' utility" + . " to report this problem, or send email to" + . " 'perlbug\@perl.org'. $LC=$bit"); + } + # 1 is added so that the pseudo-category :characters, which is # -1, comes out 0. $^H{locale} |= 1 << ($bit + 1); diff --git a/lib/locale.t b/lib/locale.t index 1b510d2..6b5616c 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -63,7 +63,7 @@ my $dumper = Dumpvalue->new( sub debug { return unless $debug; my($mess) = join "", '# ', @_; - chop $mess; + chomp $mess; print STDERR $dumper->stringify($mess,1), "\n"; } @@ -88,6 +88,7 @@ sub ok { print "ok " . ++$test_num; print " $message"; print "\n"; + return ($result) ? 1 : 0; } # First we'll do a lot of taint checking for locales. @@ -115,6 +116,20 @@ sub check_taint_not ($;$) { ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); } +foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { + my $short_result = locales_enabled($category); + ok ($short_result == 0 || $short_result == 1, + "Verify locales_enabled('$category') returns 0 or 1"); + debug("locales_enabled('$category') returned '$short_result'"); + my $long_result = locales_enabled("LC_$category"); + if (! ok ($long_result == $short_result, + " and locales_enabled('LC_$category') returns " + . "the same value") + ) { + debug("locales_enabled('LC_$category') returned $long_result"); + } +} + "\tb\t" =~ /^m?(\s)(.*)\1$/; check_taint_not $&, "not tainted outside 'use locale'"; ; diff --git a/locale.c b/locale.c index 9b0979d..d1ea74c 100644 --- a/locale.c +++ b/locale.c @@ -569,6 +569,8 @@ Perl_my_setlocale(pTHX_ int category, const char* locale) } result = setlocale(category, locale); + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, + _setlocale_debug_string(category, locale, result))); if (! override_LC_ALL) { return result; @@ -583,41 +585,63 @@ Perl_my_setlocale(pTHX_ int category, const char* locale) result = PerlEnv_getenv("LC_TIME"); if (result && strNE(result, "")) { setlocale(LC_TIME, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_TIME, result, "not captured"))); } # endif # ifdef USE_LOCALE_CTYPE result = PerlEnv_getenv("LC_CTYPE"); if (result && strNE(result, "")) { setlocale(LC_CTYPE, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_CTYPE, result, "not captured"))); } # endif # ifdef USE_LOCALE_COLLATE result = PerlEnv_getenv("LC_COLLATE"); if (result && strNE(result, "")) { setlocale(LC_COLLATE, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_COLLATE, result, "not captured"))); } # endif # ifdef USE_LOCALE_MONETARY result = PerlEnv_getenv("LC_MONETARY"); if (result && strNE(result, "")) { setlocale(LC_MONETARY, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_MONETARY, result, "not captured"))); } # endif # ifdef USE_LOCALE_NUMERIC result = PerlEnv_getenv("LC_NUMERIC"); if (result && strNE(result, "")) { setlocale(LC_NUMERIC, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_NUMERIC, result, "not captured"))); } # endif # ifdef USE_LOCALE_MESSAGES result = PerlEnv_getenv("LC_MESSAGES"); if (result && strNE(result, "")) { setlocale(LC_MESSAGES, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_MESSAGES, result, "not captured"))); } # endif - return setlocale(LC_ALL, NULL); + result = setlocale(LC_ALL, NULL); + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_ALL, NULL, result))); + return result; } #endif @@ -639,7 +663,42 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * 1 = set ok or not applicable, * 0 = fallback to a locale of lower priority * -1 = fallback to all locales failed, not even to the C locale - */ + * + * Under -DDEBUGGING, if the environment variable PERL_DEBUG_LOCALE_INIT is + * set, debugging information is output. + * + * This looks more complicated than it is, mainly due to the #ifdefs. + * + * We try to set LC_ALL to the value determined by the environment. If + * there is no LC_ALL on this platform, we try the individual categories we + * know about. If this works, we are done. + * + * But if it doesn't work, we have to do something else. We search the + * environment variables ourselves instead of relying on the system to do + * it. We look at, in order, LC_ALL, LANG, a system default locale (if we + * think there is one), and the ultimate fallback "C". This is all done in + * the same loop as above to avoid duplicating code, but it makes things + * more complex. After the original failure, we add the fallback + * possibilities to the list of locales to try, and iterate the loop + * through them all until one succeeds. + * + * On Ultrix, the locale MUST come from the environment, so there is + * preliminary code to set it. I (khw) am not sure that it is necessary, + * and that this couldn't be folded into the loop, but barring any real + * platforms to test on, it's staying as-is + * + * A slight complication is that in embedded Perls, the locale may already + * be set-up, and we don't want to get it from the normal environment + * variables. This is handled by having a special environment variable + * indicate we're in this situation. We simply set setlocale's 2nd + * parameter to be a NULL instead of "". That indicates to setlocale that + * it is not to change anything, but to return the current value, + * effectively initializing perl's db to what the locale already is. + * + * We play the same trick with NULL if a LC_ALL succeeds. We call + * setlocale() on the individual categores with NULL to get their existing + * values for our db, instead of trying to change them. + * */ int ok = 1; @@ -661,6 +720,24 @@ Perl_init_i18nl10n(pTHX_ int printwarn) const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT")) ? NULL : ""; +#ifdef DEBUGGING + const bool debug = (PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT")) + ? TRUE + : FALSE; +# define DEBUG_LOCALE_INIT(category, locale, result) \ + STMT_START { \ + if (debug) { \ + PerlIO_printf(Perl_debug_log, \ + "%s:%d: %s\n", \ + __FILE__, __LINE__, \ + _setlocale_debug_string(category, \ + locale, \ + result)); \ + } \ + } STMT_END +#else +# define DEBUG_LOCALE_INIT(a,b,c) +#endif const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */ unsigned int trial_locales_count; const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL")); @@ -680,6 +757,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) *bad_lang_use_once && strNE("0", bad_lang_use_once))))); bool done = FALSE; + char * sl_result; /* return from setlocale() */ + char * locale_param; #ifdef WIN32 /* In some systems you can find out the system default locale * and use that as the fallback locale. */ @@ -691,6 +770,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #ifndef LOCALE_ENVIRON_REQUIRED PERL_UNUSED_VAR(done); + PERL_UNUSED_VAR(locale_param); #else /* @@ -700,52 +780,64 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # ifdef LC_ALL if (lang) { - if (my_setlocale(LC_ALL, setlocale_init)) + sl_result = my_setlocale(LC_ALL, setlocale_init); + DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result); + if (sl_result) done = TRUE; else setlocale_failure = TRUE; } - if (!setlocale_failure) { + if (! setlocale_failure) { # ifdef USE_LOCALE_CTYPE - if (! (curctype = - my_setlocale(LC_CTYPE, - (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) - ? setlocale_init : NULL))) + locale_param = (! done && (lang || PerlEnv_getenv("LC_CTYPE"))) + ? setlocale_init + : NULL; + curctype = my_setlocale(LC_CTYPE, locale_param); + DEBUG_LOCALE_INIT(LC_CTYPE, locale_param, sl_result); + if (! curctype) setlocale_failure = TRUE; else curctype = savepv(curctype); # endif /* USE_LOCALE_CTYPE */ # ifdef USE_LOCALE_COLLATE - if (! (curcoll = - my_setlocale(LC_COLLATE, - (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) - ? setlocale_init : NULL))) + locale_param = (! done && (lang || PerlEnv_getenv("LC_COLLATE"))) + ? setlocale_init + : NULL; + curcoll = my_setlocale(LC_COLLATE, locale_param); + DEBUG_LOCALE_INIT(LC_COLLATE, locale_param, sl_result); + if (! curcoll) setlocale_failure = TRUE; else curcoll = savepv(curcoll); # endif /* USE_LOCALE_COLLATE */ # ifdef USE_LOCALE_NUMERIC - if (! (curnum = - my_setlocale(LC_NUMERIC, - (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) - ? setlocale_init : NULL))) + locale_param = (! done && (lang || PerlEnv_getenv("LC_NUMERIC"))) + ? setlocale_init + : NULL; + curnum = my_setlocale(LC_NUMERIC, locale_param); + DEBUG_LOCALE_INIT(LC_NUMERIC, locale_param, sl_result); + if (! curnum) setlocale_failure = TRUE; else curnum = savepv(curnum); # endif /* USE_LOCALE_NUMERIC */ # ifdef USE_LOCALE_MESSAGES - if (! my_setlocale(LC_MESSAGES, - (!done && (lang || PerlEnv_getenv("LC_MESSAGES"))) - ? setlocale_init : NULL)) - { + locale_param = (! done && (lang || PerlEnv_getenv("LC_MESSAGES"))) + ? setlocale_init + : NULL; + sl_result = my_setlocale(LC_MESSAGES, locale_param); + DEBUG_LOCALE_INIT(LC_MESSAGES, locale_param, sl_result); + if (! sl_result) setlocale_failure = TRUE; } # endif /* USE_LOCALE_MESSAGES */ # ifdef USE_LOCALE_MONETARY - if (! my_setlocale(LC_MONETARY, - (!done && (lang || PerlEnv_getenv("LC_MONETARY"))) - ? setlocale_init : NULL)) - { + locale_param = (! done && (lang || PerlEnv_getenv("LC_MONETARY"))) + ? setlocale_init + : NULL; + sl_result = my_setlocale(LC_MONETARY, locale_param); + DEBUG_LOCALE_INIT(LC_MONETARY, locale_param, sl_result); + if (! sl_result) { setlocale_failure = TRUE; } # endif /* USE_LOCALE_MONETARY */ @@ -781,6 +873,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* Note that this may change the locale, but we are going to do * that anyway just below */ system_default_locale = setlocale(LC_ALL, ""); + DEBUG_LOCALE_INIT(LC_ALL, "", system_default_locale); /* Skip if invalid or it's already on the list of locales to * try */ @@ -800,7 +893,9 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } #ifdef LC_ALL - if (! my_setlocale(LC_ALL, trial_locale)) { + sl_result = my_setlocale(LC_ALL, trial_locale); + DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result); + if (! sl_result) { setlocale_failure = TRUE; } else { @@ -818,31 +913,41 @@ Perl_init_i18nl10n(pTHX_ int printwarn) if (!setlocale_failure) { #ifdef USE_LOCALE_CTYPE Safefree(curctype); - if (! (curctype = my_setlocale(LC_CTYPE, trial_locale))) + curctype = my_setlocale(LC_CTYPE, trial_locale); + DEBUG_LOCALE_INIT(LC_CTYPE, trial_locale, curctype); + if (! curctype) setlocale_failure = TRUE; else curctype = savepv(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE Safefree(curcoll); - if (! (curcoll = my_setlocale(LC_COLLATE, trial_locale))) + curcoll = my_setlocale(LC_COLLATE, trial_locale); + DEBUG_LOCALE_INIT(LC_COLLATE, trial_locale, curcoll); + if (! curcoll) setlocale_failure = TRUE; else curcoll = savepv(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC Safefree(curnum); - if (! (curnum = my_setlocale(LC_NUMERIC, trial_locale))) + curnum = my_setlocale(LC_NUMERIC, trial_locale); + DEBUG_LOCALE_INIT(LC_NUMERIC, trial_locale, curnum); + if (! curnum) setlocale_failure = TRUE; else curnum = savepv(curnum); #endif /* USE_LOCALE_NUMERIC */ #ifdef USE_LOCALE_MESSAGES - if (! (my_setlocale(LC_MESSAGES, trial_locale))) + sl_result = my_setlocale(LC_MESSAGES, trial_locale); + DEBUG_LOCALE_INIT(LC_MESSAGES, trial_locale, sl_result); + if (! (sl_result)) setlocale_failure = TRUE; #endif /* USE_LOCALE_MESSAGES */ #ifdef USE_LOCALE_MONETARY - if (! (my_setlocale(LC_MONETARY, trial_locale))) + sl_result = my_setlocale(LC_MONETARY, trial_locale); + DEBUG_LOCALE_INIT(LC_MONETARY, trial_locale, sl_result); + if (! (sl_result)) setlocale_failure = TRUE; #endif /* USE_LOCALE_MONETARY */ @@ -935,7 +1040,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * LANG, and the C locale. We don't try the same locale twice, so * don't add to the list if already there. (On POSIX systems, the * LC_ALL element will likely be a repeat of the 0th element "", - * but there's no harm done by doing it explicitly */ + * but there's no harm done by doing it explicitly. + * + * Note that this tries the LC_ALL environment variable even on + * systems which have no LC_ALL locale setting. This may or may + * not have been originally intentional, but there's no real need + * to change the behavior. */ if (lc_all) { for (j = 0; j < trial_locales_count; j++) { if (strEQ(lc_all, trial_locales[j])) { @@ -1000,14 +1110,17 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #ifdef USE_LOCALE_CTYPE Safefree(curctype); curctype = savepv(setlocale(LC_CTYPE, NULL)); + DEBUG_LOCALE_INIT(LC_CTYPE, NULL, curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE Safefree(curcoll); curcoll = savepv(setlocale(LC_COLLATE, NULL)); + DEBUG_LOCALE_INIT(LC_COLLATE, NULL, curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC Safefree(curnum); curnum = savepv(setlocale(LC_NUMERIC, NULL)); + DEBUG_LOCALE_INIT(LC_NUMERIC, NULL, curnum); #endif /* USE_LOCALE_NUMERIC */ } @@ -1749,6 +1862,100 @@ Perl_sync_locale(pTHX) } +#if defined(DEBUGGING) && defined(USE_LOCALE) + +char * +Perl__setlocale_debug_string(const int category, /* category number, + like LC_ALL */ + const char* const locale, /* locale name */ + + /* return value from setlocale() when attempting to + * set 'category' to 'locale' */ + const char* const retval) +{ + /* Returns a pointer to a NUL-terminated string in static storage with + * added text about the info passed in. This is not thread safe and will + * be overwritten by the next call, so this should be used just to + * formulate a string to immediately print or savepv() on. + * + * Buffer overflow checking is done only after the fact (via an assert), + * because this is used only in DEBUGGING, and an attacker would have to + * control the start up of perl with the correct environment variable or + * command line option. */ + + static char ret[128] = ""; + + strcpy(ret, "setlocale("); + + switch (category) { + default: + sprintf(ret, "%s? %d", ret, category); + break; +# ifdef LC_ALL + case LC_ALL: + strcat(ret, "LC_ALL"); + break; +# endif +# ifdef LC_CTYPE + case LC_CTYPE: + strcat(ret, "LC_CTYPE"); + break; +# endif +# ifdef LC_NUMERIC + case LC_NUMERIC: + strcat(ret, "LC_NUMERIC"); + break; +# endif +# ifdef LC_COLLATE + case LC_COLLATE: + strcat(ret, "LC_COLLATE"); + break; +# endif +# ifdef LC_TIME + case LC_TIME: + strcat(ret, "LC_TIME"); + break; +# endif +# ifdef LC_MONETARY + case LC_MONETARY: + strcat(ret, "LC_MONETARY"); + break; +# endif +# ifdef LC_MESSAGES + case LC_MESSAGES: + strcat(ret, "LC_MESSAGES"); + break; +# endif + } + + strcat(ret, ", "); + + if (locale) { + strcat(ret, "\""); + strcat(ret, locale); + strcat(ret, "\""); + } + else { + strcat(ret, "NULL"); + } + + strcat(ret, ") returned "); + + if (retval) { + strcat(ret, "\""); + strcat(ret, retval); + strcat(ret, "\""); + } + else { + strcat(ret, "NULL"); + } + + assert(strlen(ret) < sizeof(ret)); + + return ret; +} + +#endif /* diff --git a/makedef.pl b/makedef.pl index 1fee334..d1adad0 100644 --- a/makedef.pl +++ b/makedef.pl @@ -253,6 +253,7 @@ unless ($define{'DEBUGGING'}) { Perl_debstackptrs Perl_pad_sv Perl_pad_setsv + Perl__setlocale_debug_string Perl_set_padlist Perl_hv_assert PL_watchaddr diff --git a/perl.h b/perl.h index b040291..cb877a3 100644 --- a/perl.h +++ b/perl.h @@ -4055,6 +4055,7 @@ Gid_t getegid (void); # define DEBUG_Xv_TEST_ (DEBUG_X_TEST_ && DEBUG_v_TEST_) # define DEBUG_Uv_TEST_ (DEBUG_U_TEST_ && DEBUG_v_TEST_) # define DEBUG_Pv_TEST_ (DEBUG_P_TEST_ && DEBUG_v_TEST_) +# define DEBUG_Lv_TEST_ (DEBUG_L_TEST_ && DEBUG_v_TEST_) #ifdef DEBUGGING @@ -4088,6 +4089,7 @@ Gid_t getegid (void); # define DEBUG_Xv_TEST DEBUG_Xv_TEST_ # define DEBUG_Uv_TEST DEBUG_Uv_TEST_ # define DEBUG_Pv_TEST DEBUG_Pv_TEST_ +# define DEBUG_Lv_TEST DEBUG_Lv_TEST_ # define PERL_DEB(a) a # define PERL_DEB2(a,b) a @@ -4127,6 +4129,7 @@ Gid_t getegid (void); # define DEBUG_Xv(a) DEBUG__(DEBUG_Xv_TEST, a) # define DEBUG_Uv(a) DEBUG__(DEBUG_Uv_TEST, a) # define DEBUG_Pv(a) DEBUG__(DEBUG_Pv_TEST, a) +# define DEBUG_Lv(a) DEBUG__(DEBUG_Lv_TEST, a) # define DEBUG_S(a) DEBUG__(DEBUG_S_TEST, a) # define DEBUG_T(a) DEBUG__(DEBUG_T_TEST, a) @@ -4171,6 +4174,7 @@ Gid_t getegid (void); # define DEBUG_Xv_TEST (0) # define DEBUG_Uv_TEST (0) # define DEBUG_Pv_TEST (0) +# define DEBUG_Lv_TEST (0) # define PERL_DEB(a) # define PERL_DEB2(a,b) b @@ -4204,6 +4208,7 @@ Gid_t getegid (void); # define DEBUG_Xv(a) # define DEBUG_Uv(a) # define DEBUG_Pv(a) +# define DEBUG_Lv(a) #endif /* DEBUGGING */ diff --git a/proto.h b/proto.h index 0f4e7a0..4d3465f 100644 --- a/proto.h +++ b/proto.h @@ -3828,6 +3828,12 @@ STATIC int S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp); #define PERL_ARGS_ASSERT_TOKEREPORT \ assert(lvalp) # endif +# if defined(USE_LOCALE) && (defined(PERL_IN_LOCALE_C) || defined (PERL_EXT_POSIX)) +PERL_CALLCONV char * Perl__setlocale_debug_string(const int category, const char* const locale, const char* const retval) + __attribute__warn_unused_result__ + __attribute__pure__; + +# endif #endif #if defined(DEBUG_LEAKING_SCALARS_FORK_DUMP) PERL_CALLCONV void Perl_dump_sv_child(pTHX_ SV *sv); diff --git a/regcomp.c b/regcomp.c index ccbccf8..24af9d7 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6794,25 +6794,25 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, || ! has_charset); bool has_runon = ((RExC_seen & REG_RUN_ON_COMMENT_SEEN) == REG_RUN_ON_COMMENT_SEEN); - U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) + U8 reganch = (U8)((r->extflags & RXf_PMf_STD_PMMOD) >> RXf_PMf_STD_PMMOD_SHIFT); const char *fptr = STD_PAT_MODS; /*"msixn"*/ char *p; - /* Allocate for the worst case, which is all the std flags are turned - * on. If more precision is desired, we could do a population count of - * the flags set. This could be done with a small lookup table, or by - * shifting, masking and adding, or even, when available, assembly - * language for a machine-language population count. - * We never output a minus, as all those are defaults, so are + + /* We output all the necessary flags; we never output a minus, as all + * those are defaults, so are * covered by the caret */ const STRLEN wraplen = plen + has_p + has_runon + has_default /* If needs a caret */ + + PL_bitcount[reganch] /* 1 char for each set standard flag */ /* If needs a character set specifier */ + ((has_charset) ? MAX_CHARSET_NAME_LENGTH : 0) - + (sizeof(STD_PAT_MODS) - 1) + (sizeof("(?:)") - 1); + /* make sure PL_bitcount bounds not exceeded */ + assert(sizeof(STD_PAT_MODS) <= 8); + Newx(p, wraplen + 1, char); /* +1 for the ending NUL */ r->xpv_len_u.xpvlenu_pv = p; if (RExC_utf8) diff --git a/t/loc_tools.pl b/t/loc_tools.pl index 541e08f..86d8e48 100644 --- a/t/loc_tools.pl +++ b/t/loc_tools.pl @@ -80,20 +80,38 @@ sub _decode_encodings { # For use only by other functions in this file! return @enc; } +# LC_ALL can be -1 on some platforms. And, in fact the implementors could +# legally use any integer to represent any category. But it makes the most +# sense for them to have used small integers. Below, we create new locale +# numbers for ones missing from this machine. We make them very negative, +# hopefully more negative than anything likely to be a valid category on the +# platform, but also below is a check to be sure that our guess is valid. +my $max_bad_category_number = -1000000; + # Initialize this hash so that it looks like e.g., # 6 => 'CTYPE', # where 6 is the value of &POSIX::LC_CTYPE my %category_name; eval { require POSIX; import POSIX 'locale_h'; }; unless ($@) { - my $number_for_missing_category = 0; + my $number_for_missing_category = $max_bad_category_number; foreach my $name (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { my $number = eval "&POSIX::LC_$name"; - # Use a negative number if the platform doesn't support this category, - # so we have an entry for all ones that might be specified in calls to - # us. - $number = --$number_for_missing_category if $@; + if ($@) { + # Use a negative number (smaller than any legitimate category + # number) if the platform doesn't support this category, so we + # have an entry for all the ones that might be specified in calls + # to us. + $number = $number_for_missing_category-- if $@; + } + elsif ( $number !~ / ^ -? \d+ $ /x + || $number <= $max_bad_category_number) + { + # We think this should be an int. And it has to be larger than + # any of our synthetic numbers. + die "Unexpected locale category number '$number' for LC_$name" + } $category_name{$number} = "$name"; } @@ -129,19 +147,30 @@ sub locales_enabled(;$) { if (defined $categories_ref) { $categories_ref = [ $categories_ref ] if ! ref $categories_ref; my @local_categories_copy = @$categories_ref; - for my $category (@local_categories_copy) { - if ($category =~ / ^ -? \d+ $ /x) { - die "Invalid locale category number '$category'" - unless grep { $category == $_ } keys %category_name; - $category = $category_name{$category}; + for my $category_name_or_number (@local_categories_copy) { + my $name; + my $number; + if ($category_name_or_number =~ / ^ -? \d+ $ /x) { + $number = $category_name_or_number; + die "Invalid locale category number '$number'" + unless grep { $number == $_ } keys %category_name; + $name = $category_name{$number}; } else { - $category =~ s/ ^ LC_ //x; - die "Invalid locale category name '$category'" - unless grep { $category eq $_ } values %category_name; + $name = $category_name_or_number; + $name =~ s/ ^ LC_ //x; + foreach my $trial (keys %category_name) { + if ($category_name{$trial} eq $name) { + $number = $trial; + last; + } + } + die "Invalid locale category name '$name'" + unless defined $number; } - return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE_$category\b/; + return 0 if $number <= $max_bad_category_number + || $Config{ccflags} =~ /\bD?NO_LOCALE_$name\b/; } } -- Perl5 Master Repository
