In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/992001bfb28aa89a918dfb566d0413ea40d9b0f5?hp=7ce1b4c45d9211e3a1d1630a5d0294b7f86ea037>

- Log -----------------------------------------------------------------
commit 992001bfb28aa89a918dfb566d0413ea40d9b0f5
Author: Karl Williamson <[email protected]>
Date:   Mon Sep 7 22:18:55 2015 -0600

    Slightly shorten most regex patterns
    
    A compiled pattern requires a byte for each non-default modifier, like
    /i.  Previously, the worst case was presumed in allocating the space
    (every modifier being non-default).  Now, only the actual needed space
    is reserved.

M       globvar.sym
M       regcomp.c

commit 308482c27259302fb2ca8c60b8383609a0e9f314
Author: Karl Williamson <[email protected]>
Date:   Mon Sep 7 10:03:27 2015 -0600

    t/loc_tools.pl: Fix some bugs in locales_enabled()
    
    This code assumed that all locale categories were represented by
    non-negative whole numbers.  However, it turns out that this assumption
    is wrong, as on AIX, LC_ALL is -1.  This commit changes our assumption to
    take into account that reality; it now assumes that all categories are
    larger than a much more negative number, and now the new assumption is
    tested for, and if wrong, the code dies instead of silently doing the
    wrong thing.
    
    There was also a bug where if a locale category wasn't defined on the
    machine, but the corresponding #ifdef for using that category was still
    set, the category was improperly assumed to exist

M       lib/locale.t
M       t/loc_tools.pl

commit d8f8a4817f5910267c45439ddb7764b371f06276
Author: Karl Williamson <[email protected]>
Date:   Tue Sep 8 09:39:18 2015 -0600

    lib/locale.t: Use 'chomp' not 'chop'

M       lib/locale.t

commit cf82bb84f419f9ee1d5beb2aaa94af3a6b9d5d44
Author: Karl Williamson <[email protected]>
Date:   Tue Sep 8 09:45:46 2015 -0600

    lib/locale.t: sub ok() returns pass/fail
    
    This file rolls its own TAP, and it did not have its ok() return
    pass/fail.

M       lib/locale.t

commit 51cdbd7cf1dd4d8b5b25f19d5d60fa1f1b672468
Author: Karl Williamson <[email protected]>
Date:   Sun Sep 6 10:24:45 2015 -0600

    lib/locale.pm: Add an assertion
    
    It turns out that the code assumes that the values for LC_CTYPE,
    LC_MESSAGES, ...  are small non-negative numbers, as a bit position is
    reserved for each of these.  It's better to make this assumption
    explicit rather than getting hard-to-find failures.
    
    (LC_ALL doesn't have to be of this form, and is in fact -1 on AIX)

M       lib/locale.pm

commit bbc981342c254b86d5bc82e5175169b68f0e59ce
Author: Karl Williamson <[email protected]>
Date:   Fri May 8 15:19:56 2015 -0600

    Add more -DL debugging info
    
    This adds more stuff that gets dumped when debugging locale handling.
    And it adds even more when the v modifier appears.

M       ext/POSIX/POSIX.xs
M       locale.c
M       perl.h

commit 5d1187d1639ce42a8a9283c8282136fa16d41e50
Author: Karl Williamson <[email protected]>
Date:   Tue Sep 8 09:53:48 2015 -0600

    Add code for debugging locale initialization
    
    This initialization is done before the processing of command line
    arguments, so that it has to be handled specially.  This commit changes
    the initialization code to output debugging information if the
    environment variable PERL_DEBUG_LOCALE_INIT is set.
    
    I don't see the need to document this outside the source, as anyone who
    is using it would be reading the source anyway; it's of highly
    specialized use.

M       embed.fnc
M       embed.h
M       locale.c
M       makedef.pl
M       proto.h

commit 6b058d4267db1fda2ada9ef3729c7477bbfa07c6
Author: Karl Williamson <[email protected]>
Date:   Tue Sep 8 09:52:57 2015 -0600

    locale.c: Add clarifying comments

M       locale.c
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc          |   6 ++
 embed.h            |   3 +
 ext/POSIX/POSIX.xs |  21 ++++-
 globvar.sym        |   1 +
 lib/locale.pm      |  14 ++-
 lib/locale.t       |  17 +++-
 locale.c           | 269 +++++++++++++++++++++++++++++++++++++++++++++++------
 makedef.pl         |   1 +
 perl.h             |   5 +
 proto.h            |   6 ++
 regcomp.c          |  16 ++--
 t/loc_tools.pl     |  57 +++++++++---
 12 files changed, 357 insertions(+), 59 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index ca6a5c7..f1abcd0 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2472,8 +2472,14 @@ s        |char*  |stdize_locale  |NN char* locs
 #if defined(USE_LOCALE) \
     && (defined(PERL_IN_LOCALE_C) || defined (PERL_EXT_POSIX))
 ApM    |bool   |_is_cur_LC_category_utf8|int category
+#      ifdef DEBUGGING
+AMnPpR |char * |_setlocale_debug_string|const int category                 \
+                                       |NULLOK const char* const locale    \
+                                       |NULLOK const char* const retval
+#      endif
 #endif
 
+
 #if defined(PERL_IN_UTIL_C)
 s      |SV*    |mess_alloc
 s      |SV *   |with_queued_errors|NN SV *ex
diff --git a/embed.h b/embed.h
index faa4112..3f6515f 100644
--- a/embed.h
+++ b/embed.h
@@ -781,6 +781,9 @@
 #if defined(DEBUGGING)
 #define pad_setsv(a,b)         Perl_pad_setsv(aTHX_ a,b)
 #define pad_sv(a)              Perl_pad_sv(aTHX_ a)
+#  if defined(USE_LOCALE)     && (defined(PERL_IN_LOCALE_C) || defined 
(PERL_EXT_POSIX))
+#define _setlocale_debug_string        Perl__setlocale_debug_string
+#  endif
 #endif
 #if defined(HAS_SIGACTION) && defined(SA_SIGINFO)
 #define csighandler            Perl_csighandler
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index 7d76af3..02c5c47 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -2269,6 +2269,9 @@ setlocale(category, locale = 0)
 #else
        retval = setlocale(category, locale);
 #endif
+        DEBUG_L(PerlIO_printf(Perl_debug_log,
+            "%s:%d: %s\n", __FILE__, __LINE__,
+                _setlocale_debug_string(category, locale, retval)));
        if (! retval) {
             /* Should never happen that a query would return an error, but be
              * sure and reset to C locale */
@@ -2298,8 +2301,12 @@ setlocale(category, locale = 0)
            {
                char *newctype;
 #ifdef LC_ALL
-               if (category == LC_ALL)
+               if (category == LC_ALL) {
                    newctype = setlocale(LC_CTYPE, NULL);
+                    DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+                        "%s:%d: %s\n", __FILE__, __LINE__,
+                        _setlocale_debug_string(LC_CTYPE, NULL, newctype)));
+                }
                else
 #endif
                    newctype = RETVAL;
@@ -2315,8 +2322,12 @@ setlocale(category, locale = 0)
            {
                char *newcoll;
 #ifdef LC_ALL
-               if (category == LC_ALL)
+               if (category == LC_ALL) {
                    newcoll = setlocale(LC_COLLATE, NULL);
+                    DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+                        "%s:%d: %s\n", __FILE__, __LINE__,
+                        _setlocale_debug_string(LC_COLLATE, NULL, newcoll)));
+                }
                else
 #endif
                    newcoll = RETVAL;
@@ -2332,8 +2343,12 @@ setlocale(category, locale = 0)
            {
                char *newnum;
 #ifdef LC_ALL
-               if (category == LC_ALL)
+               if (category == LC_ALL) {
                    newnum = setlocale(LC_NUMERIC, NULL);
+                    DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+                        "%s:%d: %s\n", __FILE__, __LINE__,
+                        _setlocale_debug_string(LC_NUMERIC, NULL, newnum)));
+                }
                else
 #endif
                    newnum = RETVAL;
diff --git a/globvar.sym b/globvar.sym
index 1183d67..2943fc6 100644
--- a/globvar.sym
+++ b/globvar.sym
@@ -5,6 +5,7 @@
 PL_No
 PL_Yes
 PL_bincompat_options
+PL_bitcount
 PL_block_type
 PL_charclass
 PL_check
diff --git a/lib/locale.pm b/lib/locale.pm
index 9cc243f..53c01ff 100644
--- a/lib/locale.pm
+++ b/lib/locale.pm
@@ -102,10 +102,20 @@ sub import {
             }
 
             # Map our names to the ones defined by POSIX
-            $arg = "LC_" . uc($arg);
+            my $LC = "LC_" . uc($arg);
 
-            my $bit = eval "&POSIX::$arg";
+            my $bit = eval "&POSIX::$LC";
             if (defined $bit) {
+
+                # Verify our assumption.
+                if (! ($bit >= 0 && $bit < 31)) {
+                    require Carp;
+                    Carp::croak("Cannot have ':$arg' parameter to 'use locale'"
+                              . " on this platform.  Use the 'perlbug' utility"
+                              . " to report this problem, or send email to"
+                              . " 'perlbug\@perl.org'.  $LC=$bit");
+                }
+
                 # 1 is added so that the pseudo-category :characters, which is
                 # -1, comes out 0.
                 $^H{locale} |= 1 << ($bit + 1);
diff --git a/lib/locale.t b/lib/locale.t
index 1b510d2..6b5616c 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -63,7 +63,7 @@ my $dumper = Dumpvalue->new(
 sub debug {
   return unless $debug;
   my($mess) = join "", '# ', @_;
-  chop $mess;
+  chomp $mess;
   print STDERR $dumper->stringify($mess,1), "\n";
 }
 
@@ -88,6 +88,7 @@ sub ok {
     print "ok " . ++$test_num;
     print " $message";
     print "\n";
+    return ($result) ? 1 : 0;
 }
 
 # First we'll do a lot of taint checking for locales.
@@ -115,6 +116,20 @@ sub check_taint_not ($;$) {
     ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
 }
 
+foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
+    my $short_result = locales_enabled($category);
+    ok ($short_result == 0 || $short_result == 1,
+        "Verify locales_enabled('$category') returns 0 or 1");
+    debug("locales_enabled('$category') returned '$short_result'");
+    my $long_result = locales_enabled("LC_$category");
+    if (! ok ($long_result == $short_result,
+              "   and locales_enabled('LC_$category') returns "
+            . "the same value")
+    ) {
+        debug("locales_enabled('LC_$category') returned $long_result");
+    }
+}
+
 "\tb\t" =~ /^m?(\s)(.*)\1$/;
 check_taint_not   $&, "not tainted outside 'use locale'";
 ;
diff --git a/locale.c b/locale.c
index 9b0979d..d1ea74c 100644
--- a/locale.c
+++ b/locale.c
@@ -569,6 +569,8 @@ Perl_my_setlocale(pTHX_ int category, const char* locale)
     }
 
     result = setlocale(category, locale);
+    DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__,
+                            _setlocale_debug_string(category, locale, 
result)));
 
     if (! override_LC_ALL)  {
         return result;
@@ -583,41 +585,63 @@ Perl_my_setlocale(pTHX_ int category, const char* locale)
     result = PerlEnv_getenv("LC_TIME");
     if (result && strNE(result, "")) {
         setlocale(LC_TIME, result);
+        DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
+                    __FILE__, __LINE__,
+                    _setlocale_debug_string(LC_TIME, result, "not captured")));
     }
 #   endif
 #   ifdef USE_LOCALE_CTYPE
     result = PerlEnv_getenv("LC_CTYPE");
     if (result && strNE(result, "")) {
         setlocale(LC_CTYPE, result);
+        DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
+                    __FILE__, __LINE__,
+                    _setlocale_debug_string(LC_CTYPE, result, "not 
captured")));
     }
 #   endif
 #   ifdef USE_LOCALE_COLLATE
     result = PerlEnv_getenv("LC_COLLATE");
     if (result && strNE(result, "")) {
         setlocale(LC_COLLATE, result);
+        DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
+                  __FILE__, __LINE__,
+                  _setlocale_debug_string(LC_COLLATE, result, "not 
captured")));
     }
 #   endif
 #   ifdef USE_LOCALE_MONETARY
     result = PerlEnv_getenv("LC_MONETARY");
     if (result && strNE(result, "")) {
         setlocale(LC_MONETARY, result);
+        DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
+                 __FILE__, __LINE__,
+                 _setlocale_debug_string(LC_MONETARY, result, "not 
captured")));
     }
 #   endif
 #   ifdef USE_LOCALE_NUMERIC
     result = PerlEnv_getenv("LC_NUMERIC");
     if (result && strNE(result, "")) {
         setlocale(LC_NUMERIC, result);
+        DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
+                 __FILE__, __LINE__,
+                 _setlocale_debug_string(LC_NUMERIC, result, "not captured")));
     }
 #   endif
 #   ifdef USE_LOCALE_MESSAGES
     result = PerlEnv_getenv("LC_MESSAGES");
     if (result && strNE(result, "")) {
         setlocale(LC_MESSAGES, result);
+        DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
+                 __FILE__, __LINE__,
+                 _setlocale_debug_string(LC_MESSAGES, result, "not 
captured")));
     }
 #   endif
 
-    return setlocale(LC_ALL, NULL);
+    result = setlocale(LC_ALL, NULL);
+    DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
+                               __FILE__, __LINE__,
+                               _setlocale_debug_string(LC_ALL, NULL, result)));
 
+    return result;
 }
 
 #endif
@@ -639,7 +663,42 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
      *    1 = set ok or not applicable,
      *    0 = fallback to a locale of lower priority
      *   -1 = fallback to all locales failed, not even to the C locale
-     */
+     *
+     * Under -DDEBUGGING, if the environment variable PERL_DEBUG_LOCALE_INIT is
+     * set, debugging information is output.
+     *
+     * This looks more complicated than it is, mainly due to the #ifdefs.
+     *
+     * We try to set LC_ALL to the value determined by the environment.  If
+     * there is no LC_ALL on this platform, we try the individual categories we
+     * know about.  If this works, we are done.
+     *
+     * But if it doesn't work, we have to do something else.  We search the
+     * environment variables ourselves instead of relying on the system to do
+     * it.  We look at, in order, LC_ALL, LANG, a system default locale (if we
+     * think there is one), and the ultimate fallback "C".  This is all done in
+     * the same loop as above to avoid duplicating code, but it makes things
+     * more complex.  After the original failure, we add the fallback
+     * possibilities to the list of locales to try, and iterate the loop
+     * through them all until one succeeds.
+     *
+     * On Ultrix, the locale MUST come from the environment, so there is
+     * preliminary code to set it.  I (khw) am not sure that it is necessary,
+     * and that this couldn't be folded into the loop, but barring any real
+     * platforms to test on, it's staying as-is
+     *
+     * A slight complication is that in embedded Perls, the locale may already
+     * be set-up, and we don't want to get it from the normal environment
+     * variables.  This is handled by having a special environment variable
+     * indicate we're in this situation.  We simply set setlocale's 2nd
+     * parameter to be a NULL instead of "".  That indicates to setlocale that
+     * it is not to change anything, but to return the current value,
+     * effectively initializing perl's db to what the locale already is.
+     *
+     * We play the same trick with NULL if a LC_ALL succeeds.  We call
+     * setlocale() on the individual categores with NULL to get their existing
+     * values for our db, instead of trying to change them.
+     * */
 
     int ok = 1;
 
@@ -661,6 +720,24 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
     const char * const setlocale_init = 
(PerlEnv_getenv("PERL_SKIP_LOCALE_INIT"))
                                         ? NULL
                                         : "";
+#ifdef DEBUGGING
+    const bool debug = (PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT"))
+                       ? TRUE
+                       : FALSE;
+#   define DEBUG_LOCALE_INIT(category, locale, result)                      \
+       STMT_START {                                                        \
+               if (debug) {                                                \
+                    PerlIO_printf(Perl_debug_log,                           \
+                                  "%s:%d: %s\n",                            \
+                                  __FILE__, __LINE__,                       \
+                                  _setlocale_debug_string(category,         \
+                                                          locale,           \
+                                                          result));         \
+                }                                                           \
+       } STMT_END
+#else
+#   define DEBUG_LOCALE_INIT(a,b,c)
+#endif
     const char* trial_locales[5];   /* 5 = 1 each for "", LC_ALL, LANG, "", C 
*/
     unsigned int trial_locales_count;
     const char * const lc_all     = savepv(PerlEnv_getenv("LC_ALL"));
@@ -680,6 +757,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
                                     *bad_lang_use_once
                                     && strNE("0", bad_lang_use_once)))));
     bool done = FALSE;
+    char * sl_result;   /* return from setlocale() */
+    char * locale_param;
 #ifdef WIN32
     /* In some systems you can find out the system default locale
      * and use that as the fallback locale. */
@@ -691,6 +770,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
 
 #ifndef LOCALE_ENVIRON_REQUIRED
     PERL_UNUSED_VAR(done);
+    PERL_UNUSED_VAR(locale_param);
 #else
 
     /*
@@ -700,52 +780,64 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
 
 #   ifdef LC_ALL
     if (lang) {
-       if (my_setlocale(LC_ALL, setlocale_init))
+       sl_result = my_setlocale(LC_ALL, setlocale_init);
+        DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result);
+       if (sl_result)
            done = TRUE;
        else
            setlocale_failure = TRUE;
     }
-    if (!setlocale_failure) {
+    if (! setlocale_failure) {
 #       ifdef USE_LOCALE_CTYPE
-       if (! (curctype =
-              my_setlocale(LC_CTYPE,
-                        (!done && (lang || PerlEnv_getenv("LC_CTYPE")))
-                                   ? setlocale_init : NULL)))
+        locale_param = (! done && (lang || PerlEnv_getenv("LC_CTYPE")))
+                       ? setlocale_init
+                       : NULL;
+       curctype = my_setlocale(LC_CTYPE, locale_param);
+        DEBUG_LOCALE_INIT(LC_CTYPE, locale_param, sl_result);
+       if (! curctype)
            setlocale_failure = TRUE;
        else
            curctype = savepv(curctype);
 #       endif /* USE_LOCALE_CTYPE */
 #       ifdef USE_LOCALE_COLLATE
-       if (! (curcoll =
-              my_setlocale(LC_COLLATE,
-                        (!done && (lang || PerlEnv_getenv("LC_COLLATE")))
-                                  ? setlocale_init : NULL)))
+        locale_param = (! done && (lang || PerlEnv_getenv("LC_COLLATE")))
+                       ? setlocale_init
+                       : NULL;
+       curcoll = my_setlocale(LC_COLLATE, locale_param);
+        DEBUG_LOCALE_INIT(LC_COLLATE, locale_param, sl_result);
+       if (! curcoll)
            setlocale_failure = TRUE;
        else
            curcoll = savepv(curcoll);
 #       endif /* USE_LOCALE_COLLATE */
 #       ifdef USE_LOCALE_NUMERIC
-       if (! (curnum =
-              my_setlocale(LC_NUMERIC,
-                        (!done && (lang || PerlEnv_getenv("LC_NUMERIC")))
-                                 ? setlocale_init : NULL)))
+        locale_param = (! done && (lang || PerlEnv_getenv("LC_NUMERIC")))
+                       ? setlocale_init
+                       : NULL;
+       curnum = my_setlocale(LC_NUMERIC, locale_param);
+        DEBUG_LOCALE_INIT(LC_NUMERIC, locale_param, sl_result);
+       if (! curnum)
            setlocale_failure = TRUE;
        else
            curnum = savepv(curnum);
 #       endif /* USE_LOCALE_NUMERIC */
 #       ifdef USE_LOCALE_MESSAGES
-       if (! my_setlocale(LC_MESSAGES,
-                        (!done && (lang || PerlEnv_getenv("LC_MESSAGES")))
-                                 ? setlocale_init : NULL))
-        {
+        locale_param = (! done && (lang || PerlEnv_getenv("LC_MESSAGES")))
+                       ? setlocale_init
+                       : NULL;
+       sl_result = my_setlocale(LC_MESSAGES, locale_param);
+        DEBUG_LOCALE_INIT(LC_MESSAGES, locale_param, sl_result);
+       if (! sl_result)
            setlocale_failure = TRUE;
         }
 #       endif /* USE_LOCALE_MESSAGES */
 #       ifdef USE_LOCALE_MONETARY
-       if (! my_setlocale(LC_MONETARY,
-                        (!done && (lang || PerlEnv_getenv("LC_MONETARY")))
-                                 ? setlocale_init : NULL))
-        {
+        locale_param = (! done && (lang || PerlEnv_getenv("LC_MONETARY")))
+                       ? setlocale_init
+                       : NULL;
+       sl_result = my_setlocale(LC_MONETARY, locale_param);
+        DEBUG_LOCALE_INIT(LC_MONETARY, locale_param, sl_result);
+       if (! sl_result) {
            setlocale_failure = TRUE;
         }
 #       endif /* USE_LOCALE_MONETARY */
@@ -781,6 +873,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
                 /* Note that this may change the locale, but we are going to do
                  * that anyway just below */
                 system_default_locale = setlocale(LC_ALL, "");
+                DEBUG_LOCALE_INIT(LC_ALL, "", system_default_locale);
 
                 /* Skip if invalid or it's already on the list of locales to
                  * try */
@@ -800,7 +893,9 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
         }
 
 #ifdef LC_ALL
-        if (! my_setlocale(LC_ALL, trial_locale)) {
+        sl_result = my_setlocale(LC_ALL, trial_locale);
+        DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result);
+        if (! sl_result) {
             setlocale_failure = TRUE;
         }
         else {
@@ -818,31 +913,41 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
         if (!setlocale_failure) {
 #ifdef USE_LOCALE_CTYPE
             Safefree(curctype);
-            if (! (curctype = my_setlocale(LC_CTYPE, trial_locale)))
+            curctype = my_setlocale(LC_CTYPE, trial_locale);
+            DEBUG_LOCALE_INIT(LC_CTYPE, trial_locale, curctype);
+            if (! curctype)
                 setlocale_failure = TRUE;
             else
                 curctype = savepv(curctype);
 #endif /* USE_LOCALE_CTYPE */
 #ifdef USE_LOCALE_COLLATE
             Safefree(curcoll);
-            if (! (curcoll = my_setlocale(LC_COLLATE, trial_locale)))
+            curcoll = my_setlocale(LC_COLLATE, trial_locale);
+            DEBUG_LOCALE_INIT(LC_COLLATE, trial_locale, curcoll);
+            if (! curcoll)
                 setlocale_failure = TRUE;
             else
                 curcoll = savepv(curcoll);
 #endif /* USE_LOCALE_COLLATE */
 #ifdef USE_LOCALE_NUMERIC
             Safefree(curnum);
-            if (! (curnum = my_setlocale(LC_NUMERIC, trial_locale)))
+            curnum = my_setlocale(LC_NUMERIC, trial_locale);
+            DEBUG_LOCALE_INIT(LC_NUMERIC, trial_locale, curnum);
+            if (! curnum)
                 setlocale_failure = TRUE;
             else
                 curnum = savepv(curnum);
 #endif /* USE_LOCALE_NUMERIC */
 #ifdef USE_LOCALE_MESSAGES
-            if (! (my_setlocale(LC_MESSAGES, trial_locale)))
+            sl_result = my_setlocale(LC_MESSAGES, trial_locale);
+            DEBUG_LOCALE_INIT(LC_MESSAGES, trial_locale, sl_result);
+            if (! (sl_result))
                 setlocale_failure = TRUE;
 #endif /* USE_LOCALE_MESSAGES */
 #ifdef USE_LOCALE_MONETARY
-            if (! (my_setlocale(LC_MONETARY, trial_locale)))
+            sl_result = my_setlocale(LC_MONETARY, trial_locale);
+            DEBUG_LOCALE_INIT(LC_MONETARY, trial_locale, sl_result);
+            if (! (sl_result))
                 setlocale_failure = TRUE;
 #endif /* USE_LOCALE_MONETARY */
 
@@ -935,7 +1040,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
              * LANG, and the C locale.  We don't try the same locale twice, so
              * don't add to the list if already there.  (On POSIX systems, the
              * LC_ALL element will likely be a repeat of the 0th element "",
-             * but there's no harm done by doing it explicitly */
+             * but there's no harm done by doing it explicitly.
+             *
+             * Note that this tries the LC_ALL environment variable even on
+             * systems which have no LC_ALL locale setting.  This may or may
+             * not have been originally intentional, but there's no real need
+             * to change the behavior. */
             if (lc_all) {
                 for (j = 0; j < trial_locales_count; j++) {
                     if (strEQ(lc_all, trial_locales[j])) {
@@ -1000,14 +1110,17 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
 #ifdef USE_LOCALE_CTYPE
             Safefree(curctype);
             curctype = savepv(setlocale(LC_CTYPE, NULL));
+            DEBUG_LOCALE_INIT(LC_CTYPE, NULL, curctype);
 #endif /* USE_LOCALE_CTYPE */
 #ifdef USE_LOCALE_COLLATE
             Safefree(curcoll);
             curcoll = savepv(setlocale(LC_COLLATE, NULL));
+            DEBUG_LOCALE_INIT(LC_COLLATE, NULL, curcoll);
 #endif /* USE_LOCALE_COLLATE */
 #ifdef USE_LOCALE_NUMERIC
             Safefree(curnum);
             curnum = savepv(setlocale(LC_NUMERIC, NULL));
+            DEBUG_LOCALE_INIT(LC_NUMERIC, NULL, curnum);
 #endif /* USE_LOCALE_NUMERIC */
         }
 
@@ -1749,6 +1862,100 @@ Perl_sync_locale(pTHX)
 
 }
 
+#if defined(DEBUGGING) && defined(USE_LOCALE)
+
+char *
+Perl__setlocale_debug_string(const int category,        /* category number,
+                                                           like LC_ALL */
+                            const char* const locale,   /* locale name */
+
+                            /* return value from setlocale() when attempting to
+                             * set 'category' to 'locale' */
+                            const char* const retval)
+{
+    /* Returns a pointer to a NUL-terminated string in static storage with
+     * added text about the info passed in.  This is not thread safe and will
+     * be overwritten by the next call, so this should be used just to
+     * formulate a string to immediately print or savepv() on.
+     *
+     * Buffer overflow checking is done only after the fact (via an assert),
+     * because this is used only in DEBUGGING, and an attacker would have to
+     * control the start up of perl with the correct environment variable or
+     * command line option. */
+
+    static char ret[128] = "";
+
+    strcpy(ret, "setlocale(");
+
+    switch (category) {
+        default:
+            sprintf(ret, "%s? %d", ret, category);
+            break;
+#   ifdef LC_ALL
+        case LC_ALL:
+            strcat(ret, "LC_ALL");
+            break;
+#   endif
+#   ifdef LC_CTYPE
+        case LC_CTYPE:
+            strcat(ret, "LC_CTYPE");
+            break;
+#   endif
+#   ifdef LC_NUMERIC
+        case LC_NUMERIC:
+            strcat(ret, "LC_NUMERIC");
+            break;
+#   endif
+#   ifdef LC_COLLATE
+        case LC_COLLATE:
+            strcat(ret, "LC_COLLATE");
+            break;
+#   endif
+#   ifdef LC_TIME
+        case LC_TIME:
+            strcat(ret, "LC_TIME");
+            break;
+#   endif
+#   ifdef LC_MONETARY
+        case LC_MONETARY:
+            strcat(ret, "LC_MONETARY");
+            break;
+#   endif
+#   ifdef LC_MESSAGES
+        case LC_MESSAGES:
+            strcat(ret, "LC_MESSAGES");
+            break;
+#   endif
+    }
+
+    strcat(ret, ", ");
+
+    if (locale) {
+        strcat(ret, "\"");
+        strcat(ret, locale);
+        strcat(ret, "\"");
+    }
+    else {
+        strcat(ret, "NULL");
+    }
+
+    strcat(ret, ") returned ");
+
+    if (retval) {
+        strcat(ret, "\"");
+        strcat(ret, retval);
+        strcat(ret, "\"");
+    }
+    else {
+        strcat(ret, "NULL");
+    }
+
+    assert(strlen(ret) < sizeof(ret));
+
+    return ret;
+}
+
+#endif
 
 
 /*
diff --git a/makedef.pl b/makedef.pl
index 1fee334..d1adad0 100644
--- a/makedef.pl
+++ b/makedef.pl
@@ -253,6 +253,7 @@ unless ($define{'DEBUGGING'}) {
                    Perl_debstackptrs
                    Perl_pad_sv
                    Perl_pad_setsv
+                    Perl__setlocale_debug_string
                    Perl_set_padlist
                    Perl_hv_assert
                    PL_watchaddr
diff --git a/perl.h b/perl.h
index b040291..cb877a3 100644
--- a/perl.h
+++ b/perl.h
@@ -4055,6 +4055,7 @@ Gid_t getegid (void);
 #  define DEBUG_Xv_TEST_ (DEBUG_X_TEST_ && DEBUG_v_TEST_)
 #  define DEBUG_Uv_TEST_ (DEBUG_U_TEST_ && DEBUG_v_TEST_)
 #  define DEBUG_Pv_TEST_ (DEBUG_P_TEST_ && DEBUG_v_TEST_)
+#  define DEBUG_Lv_TEST_ (DEBUG_L_TEST_ && DEBUG_v_TEST_)
 
 #ifdef DEBUGGING
 
@@ -4088,6 +4089,7 @@ Gid_t getegid (void);
 #  define DEBUG_Xv_TEST DEBUG_Xv_TEST_
 #  define DEBUG_Uv_TEST DEBUG_Uv_TEST_
 #  define DEBUG_Pv_TEST DEBUG_Pv_TEST_
+#  define DEBUG_Lv_TEST DEBUG_Lv_TEST_
 
 #  define PERL_DEB(a)                  a
 #  define PERL_DEB2(a,b)               a
@@ -4127,6 +4129,7 @@ Gid_t getegid (void);
 #  define DEBUG_Xv(a) DEBUG__(DEBUG_Xv_TEST, a)
 #  define DEBUG_Uv(a) DEBUG__(DEBUG_Uv_TEST, a)
 #  define DEBUG_Pv(a) DEBUG__(DEBUG_Pv_TEST, a)
+#  define DEBUG_Lv(a) DEBUG__(DEBUG_Lv_TEST, a)
 
 #  define DEBUG_S(a) DEBUG__(DEBUG_S_TEST, a)
 #  define DEBUG_T(a) DEBUG__(DEBUG_T_TEST, a)
@@ -4171,6 +4174,7 @@ Gid_t getegid (void);
 #  define DEBUG_Xv_TEST (0)
 #  define DEBUG_Uv_TEST (0)
 #  define DEBUG_Pv_TEST (0)
+#  define DEBUG_Lv_TEST (0)
 
 #  define PERL_DEB(a)
 #  define PERL_DEB2(a,b)               b
@@ -4204,6 +4208,7 @@ Gid_t getegid (void);
 #  define DEBUG_Xv(a)
 #  define DEBUG_Uv(a)
 #  define DEBUG_Pv(a)
+#  define DEBUG_Lv(a)
 #endif /* DEBUGGING */
 
 
diff --git a/proto.h b/proto.h
index 0f4e7a0..4d3465f 100644
--- a/proto.h
+++ b/proto.h
@@ -3828,6 +3828,12 @@ STATIC int       S_tokereport(pTHX_ I32 rv, const 
YYSTYPE* lvalp);
 #define PERL_ARGS_ASSERT_TOKEREPORT    \
        assert(lvalp)
 #  endif
+#  if defined(USE_LOCALE)     && (defined(PERL_IN_LOCALE_C) || defined 
(PERL_EXT_POSIX))
+PERL_CALLCONV char *   Perl__setlocale_debug_string(const int category, const 
char* const locale, const char* const retval)
+                       __attribute__warn_unused_result__
+                       __attribute__pure__;
+
+#  endif
 #endif
 #if defined(DEBUG_LEAKING_SCALARS_FORK_DUMP)
 PERL_CALLCONV void     Perl_dump_sv_child(pTHX_ SV *sv);
diff --git a/regcomp.c b/regcomp.c
index ccbccf8..24af9d7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6794,25 +6794,25 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int 
pat_count,
                     || ! has_charset);
         bool has_runon = ((RExC_seen & REG_RUN_ON_COMMENT_SEEN)
                                                    == REG_RUN_ON_COMMENT_SEEN);
-       U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD)
+       U8 reganch = (U8)((r->extflags & RXf_PMf_STD_PMMOD)
                            >> RXf_PMf_STD_PMMOD_SHIFT);
        const char *fptr = STD_PAT_MODS;        /*"msixn"*/
        char *p;
-        /* Allocate for the worst case, which is all the std flags are turned
-         * on.  If more precision is desired, we could do a population count of
-         * the flags set.  This could be done with a small lookup table, or by
-         * shifting, masking and adding, or even, when available, assembly
-         * language for a machine-language population count.
-         * We never output a minus, as all those are defaults, so are
+
+        /* We output all the necessary flags; we never output a minus, as all
+         * those are defaults, so are
          * covered by the caret */
        const STRLEN wraplen = plen + has_p + has_runon
             + has_default       /* If needs a caret */
+            + PL_bitcount[reganch] /* 1 char for each set standard flag */
 
                /* If needs a character set specifier */
            + ((has_charset) ? MAX_CHARSET_NAME_LENGTH : 0)
-            + (sizeof(STD_PAT_MODS) - 1)
             + (sizeof("(?:)") - 1);
 
+        /* make sure PL_bitcount bounds not exceeded */
+        assert(sizeof(STD_PAT_MODS) <= 8);
+
         Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
        r->xpv_len_u.xpvlenu_pv = p;
        if (RExC_utf8)
diff --git a/t/loc_tools.pl b/t/loc_tools.pl
index 541e08f..86d8e48 100644
--- a/t/loc_tools.pl
+++ b/t/loc_tools.pl
@@ -80,20 +80,38 @@ sub _decode_encodings { # For use only by other functions 
in this file!
     return @enc;
 }
 
+# LC_ALL can be -1 on some platforms.  And, in fact the implementors could
+# legally use any integer to represent any category.  But it makes the most
+# sense for them to have used small integers.  Below, we create new locale
+# numbers for ones missing from this machine.  We make them very negative,
+# hopefully more negative than anything likely to be a valid category on the
+# platform, but also below is a check to be sure that our guess is valid.
+my $max_bad_category_number = -1000000;
+
 # Initialize this hash so that it looks like e.g.,
 #   6 => 'CTYPE',
 # where 6 is the value of &POSIX::LC_CTYPE
 my %category_name;
 eval { require POSIX; import POSIX 'locale_h'; };
 unless ($@) {
-    my $number_for_missing_category = 0;
+    my $number_for_missing_category = $max_bad_category_number;
     foreach my $name (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
         my $number = eval "&POSIX::LC_$name";
 
-        # Use a negative number if the platform doesn't support this category,
-        # so we have an entry for all ones that might be specified in calls to
-        # us.
-        $number = --$number_for_missing_category if $@;
+        if ($@) {
+            # Use a negative number (smaller than any legitimate category
+            # number) if the platform doesn't support this category, so we
+            # have an entry for all the ones that might be specified in calls
+            # to us.
+            $number = $number_for_missing_category-- if $@;
+        }
+        elsif (   $number !~ / ^ -? \d+ $ /x
+               || $number <=  $max_bad_category_number)
+        {
+            # We think this should be an int.  And it has to be larger than
+            # any of our synthetic numbers.
+            die "Unexpected locale category number '$number' for LC_$name"
+        }
 
         $category_name{$number} = "$name";
     }
@@ -129,19 +147,30 @@ sub locales_enabled(;$) {
     if (defined $categories_ref) {
         $categories_ref = [ $categories_ref ] if ! ref $categories_ref;
         my @local_categories_copy = @$categories_ref;
-        for my $category (@local_categories_copy) {
-            if ($category =~ / ^ -? \d+ $ /x) {
-                die "Invalid locale category number '$category'"
-                    unless grep { $category == $_ } keys %category_name;
-                $category = $category_name{$category};
+        for my $category_name_or_number (@local_categories_copy) {
+            my $name;
+            my $number;
+            if ($category_name_or_number =~ / ^ -? \d+ $ /x) {
+                $number = $category_name_or_number;
+                die "Invalid locale category number '$number'"
+                    unless grep { $number == $_ } keys %category_name;
+                $name = $category_name{$number};
             }
             else {
-                $category =~ s/ ^ LC_ //x;
-                die "Invalid locale category name '$category'"
-                    unless grep { $category eq $_ } values %category_name;
+                $name = $category_name_or_number;
+                $name =~ s/ ^ LC_ //x;
+                foreach my $trial (keys %category_name) {
+                    if ($category_name{$trial} eq $name) {
+                        $number = $trial;
+                        last;
+                    }
+                }
+                die "Invalid locale category name '$name'"
+                    unless defined $number;
             }
 
-            return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE_$category\b/;
+            return 0 if    $number <= $max_bad_category_number
+                        || $Config{ccflags} =~ /\bD?NO_LOCALE_$name\b/;
         }
     }
 

--
Perl5 Master Repository

Reply via email to