Change 18490 by jhi@lyta on 2003/01/16 01:58:39

        Make the locale-induced UTF-8-ification of STD fhs
        and the default file open layer explicit (either -C
        or PERL_UTF8_LOCALE), instead of implicit (and unasked-for).

Affected files ...

... //depot/perl/embedvar.h#157 edit
... //depot/perl/gv.c#179 edit
... //depot/perl/intrpvar.h#113 edit
... //depot/perl/locale.c#11 edit
... //depot/perl/mg.c#247 edit
... //depot/perl/perl.c#462 edit
... //depot/perl/perlapi.h#79 edit
... //depot/perl/pod/perlrun.pod#69 edit
... //depot/perl/pod/perlunicode.pod#114 edit
... //depot/perl/pod/perluniintro.pod#45 edit
... //depot/perl/pod/perlvar.pod#112 edit

Differences ...

==== //depot/perl/embedvar.h#157 (text+w) ====
Index: perl/embedvar.h
--- perl/embedvar.h#156~18355~  Fri Dec 27 18:05:14 2002
+++ perl/embedvar.h     Wed Jan 15 17:58:39 2003
@@ -413,10 +413,10 @@
 #define PL_utf8_toupper                (vTHX->Iutf8_toupper)
 #define PL_utf8_upper          (vTHX->Iutf8_upper)
 #define PL_utf8_xdigit         (vTHX->Iutf8_xdigit)
+#define PL_utf8locale          (vTHX->Iutf8locale)
 #define PL_uudmap              (vTHX->Iuudmap)
 #define PL_wantutf8            (vTHX->Iwantutf8)
 #define PL_warnhook            (vTHX->Iwarnhook)
-#define PL_widesyscalls                (vTHX->Iwidesyscalls)
 #define PL_xiv_arenaroot       (vTHX->Ixiv_arenaroot)
 #define PL_xiv_root            (vTHX->Ixiv_root)
 #define PL_xnv_arenaroot       (vTHX->Ixnv_arenaroot)
@@ -702,10 +702,10 @@
 #define PL_Iutf8_toupper       PL_utf8_toupper
 #define PL_Iutf8_upper         PL_utf8_upper
 #define PL_Iutf8_xdigit                PL_utf8_xdigit
+#define PL_Iutf8locale         PL_utf8locale
 #define PL_Iuudmap             PL_uudmap
 #define PL_Iwantutf8           PL_wantutf8
 #define PL_Iwarnhook           PL_warnhook
-#define PL_Iwidesyscalls       PL_widesyscalls
 #define PL_Ixiv_arenaroot      PL_xiv_arenaroot
 #define PL_Ixiv_root           PL_xiv_root
 #define PL_Ixnv_arenaroot      PL_xnv_arenaroot

==== //depot/perl/gv.c#179 (text) ====
Index: perl/gv.c
--- perl/gv.c#178~18456~        Tue Jan  7 01:20:22 2003
+++ perl/gv.c   Wed Jan 15 17:58:39 2003
@@ -974,9 +974,15 @@
             goto ro_magicalize;
         else
             break;
+    case '\025':
+        if (len > 1 && strNE(name, "\025TF8_LOCALE")) 
+           break;
+       goto ro_magicalize;
+
     case '\027':       /* $^W & $^WARNING_BITS */
-       if (len > 1 && strNE(name, "\027ARNING_BITS")
-           && strNE(name, "\027IDE_SYSTEM_CALLS"))
+       if (len > 1
+           && strNE(name, "\027ARNING_BITS")
+           )
            break;
        goto magicalize;
 
@@ -1793,10 +1799,13 @@
            goto yes;
        }
        break;
+    case '\025':
+        if (len > 1 && strEQ(name, "\025TF8_LOCALE"))
+           goto yes;
     case '\027':   /* $^W & $^WARNING_BITS */
        if (len == 1
            || (len == 12 && strEQ(name, "\027ARNING_BITS"))
-           || (len == 17 && strEQ(name, "\027IDE_SYSTEM_CALLS")))
+           )
        {
            goto yes;
        }

==== //depot/perl/intrpvar.h#113 (text) ====
Index: perl/intrpvar.h
--- perl/intrpvar.h#112~18203~  Thu Nov 28 06:54:34 2002
+++ perl/intrpvar.h     Wed Jan 15 17:58:39 2003
@@ -48,7 +48,7 @@
 */
 
 PERLVAR(Idowarn,       U8)
-PERLVAR(Iwidesyscalls, bool)           /* wide system calls */
+PERLVAR(Iutf8locale,   bool)           /* utf8 locale detected */
 PERLVAR(Idoextract,    bool)
 PERLVAR(Isawampersand, bool)           /* must save all match strings */
 PERLVAR(Iunsafe,       bool)

==== //depot/perl/locale.c#11 (text) ====
Index: perl/locale.c
--- perl/locale.c#10~18165~     Tue Nov 19 14:45:36 2002
+++ perl/locale.c       Wed Jan 15 17:58:39 2003
@@ -475,7 +475,7 @@
 
 #ifdef USE_PERLIO
     {
-      /* Set PL_wantutf8 to TRUE if using PerlIO _and_
+      /* Set PL_utf8locale to TRUE if using PerlIO _and_
         any of the following are true:
         - nl_langinfo(CODESET) contains /^utf-?8/i
         - $ENV{LC_ALL}   contains /^utf-?8/i
@@ -487,37 +487,44 @@
         it overrides LC_MESSAGES for GNU gettext, and it also
         can have more than one locale, separated by spaces,
         in case you need to know.)
-        If PL_wantutf8 is true, perl.c:S_parse_body()
-        will turn on the PerlIO :utf8 discipline on STDIN, STDOUT,
-        STDERR, _and_ the default open discipline.
+        If PL_utf8locale and PL_wantutf8 (set by -C) are true,
+        perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
+        on STDIN, STDOUT, STDERR, _and_ the default open discipline.
       */
-        bool wantutf8 = FALSE;
+        bool utf8locale = FALSE;
         char *codeset = NULL;
 #if defined(HAS_NL_LANGINFO) && defined(CODESET)
         codeset = nl_langinfo(CODESET);
 #endif
         if (codeset)
-             wantutf8 = (ibcmp(codeset,  "UTF-8", 5) == 0 ||
-                         ibcmp(codeset,  "UTF8",  4) == 0);
+             utf8locale = (ibcmp(codeset,  "UTF-8", 5) == 0 ||
+                           ibcmp(codeset,  "UTF8",  4) == 0);
 #if defined(USE_LOCALE)
         else { /* nl_langinfo(CODESET) is supposed to correctly
                 * interpret the locale environment variables,
                 * but just in case it fails, let's do this manually. */ 
              if (lang)
-                  wantutf8 = (ibcmp(lang,     "UTF-8", 5) == 0 ||
-                              ibcmp(lang,     "UTF8",  4) == 0);
+                  utf8locale = (ibcmp(lang,     "UTF-8", 5) == 0 ||
+                                ibcmp(lang,     "UTF8",  4) == 0);
 #ifdef USE_LOCALE_CTYPE
              if (curctype)
-                  wantutf8 = (ibcmp(curctype,     "UTF-8", 5) == 0 ||
-                              ibcmp(curctype,     "UTF8",  4) == 0);
+                  utf8locale = (ibcmp(curctype,     "UTF-8", 5) == 0 ||
+                                ibcmp(curctype,     "UTF8",  4) == 0);
 #endif
              if (lc_all)
-                  wantutf8 = (ibcmp(lc_all,   "UTF-8", 5) == 0 ||
-                              ibcmp(lc_all,   "UTF8",  4) == 0);
-#endif /* USE_LOCALE */
+                  utf8locale = (ibcmp(lc_all,   "UTF-8", 5) == 0 ||
+                                ibcmp(lc_all,   "UTF8",  4) == 0);
         }
-        if (wantutf8)
-             PL_wantutf8 = TRUE;
+#endif /* USE_LOCALE */
+        if (utf8locale)
+             PL_utf8locale = TRUE;
+    }
+    /* Set PL_wantutf8 to $ENV{PERL_UTF8_LOCALE} if using PerlIO.
+       This is an alternative to using the -C command line switch
+       (the -C if present will override this). */
+    {
+        char *p = PerlEnv_getenv("PERL_UTF8_LOCALE");
+        PL_wantutf8 = p ? (bool) atoi(p) : FALSE;
     }
 #endif
 

==== //depot/perl/mg.c#247 (text) ====
Index: perl/mg.c
--- perl/mg.c#246~18453~        Mon Jan  6 12:31:43 2003
+++ perl/mg.c   Wed Jan 15 17:58:39 2003
@@ -662,7 +662,11 @@
                    ? (PL_taint_warn || PL_unsafe ? -1 : 1)
                    : 0);
         break;
-    case '\027':               /* ^W  & $^WARNING_BITS & ^WIDE_SYSTEM_CALLS */
+    case '\025':               /* $^UTF8_LOCALE */
+        if (strEQ(mg->mg_ptr, "\025TF8_LOCALE"))
+           sv_setiv(sv, (IV) (PL_wantutf8 && PL_utf8locale));
+        break;
+    case '\027':               /* ^W  & $^WARNING_BITS */
        if (*(mg->mg_ptr+1) == '\0')
            sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE));
        else if (strEQ(mg->mg_ptr+1, "ARNING_BITS")) {
@@ -679,8 +683,6 @@
            }
            SvPOK_only(sv);
        }
-       else if (strEQ(mg->mg_ptr+1, "IDE_SYSTEM_CALLS"))
-           sv_setiv(sv, (IV)PL_widesyscalls);
        break;
     case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8': case '9': case '&':
@@ -1925,7 +1927,13 @@
        PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
 #endif
        break;
-    case '\027':       /* ^W & $^WARNING_BITS & ^WIDE_SYSTEM_CALLS */
+    case '\025':       /* $^UTF8_LOCALE */
+        if (SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv))
+           PL_wantutf8 = PL_utf8locale;
+       else
+           PL_wantutf8 = FALSE;
+        break;
+    case '\027':       /* ^W & $^WARNING_BITS */
        if (*(mg->mg_ptr+1) == '\0') {
            if ( ! (PL_dowarn & G_WARN_ALL_MASK)) {
                i = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
@@ -1967,8 +1975,6 @@
                }
            }
        }
-       else if (strEQ(mg->mg_ptr+1, "IDE_SYSTEM_CALLS"))
-           PL_widesyscalls = (bool)SvTRUE(sv);
        break;
     case '.':
        if (PL_localizing) {

==== //depot/perl/perl.c#462 (text) ====
Index: perl/perl.c
--- perl/perl.c#461~18456~      Tue Jan  7 01:20:22 2003
+++ perl/perl.c Wed Jan 15 17:58:39 2003
@@ -1355,10 +1355,11 @@
     if (!PL_do_undump)
        init_postdump_symbols(argc,argv,env);
 
-    /* PL_wantutf8 is conditionally turned on by
+    /* PL_utf8locale is conditionally turned on by
      * locale.c:Perl_init_i18nl10n() if the environment
-     * look like the user wants to use UTF-8. */
-    if (PL_wantutf8) { /* Requires init_predump_symbols(). */
+     * look like the user wants to use UTF-8.
+     * PL_wantutf8 is turned on by -C or by $ENV{PERL_UTF8_LOCALE}. */
+    if (PL_utf8locale && PL_wantutf8) { /* Requires init_predump_symbols(). */
         IO* io;
         PerlIO* fp;
         SV* sv;
@@ -2156,7 +2157,7 @@
        return s + numlen;
     }
     case 'C':
-       PL_widesyscalls = TRUE;
+        PL_wantutf8 = TRUE; /* Can be set earlier by $ENV{PERL_UTF8_LOCALE}. */
        s++;
        return s;
     case 'F':
@@ -3397,7 +3398,7 @@
        for (; argc > 0; argc--,argv++) {
            SV *sv = newSVpv(argv[0],0);
            av_push(GvAVn(PL_argvgv),sv);
-           if (PL_widesyscalls)
+           if (PL_wantutf8)
                (void)sv_utf8_decode(sv);
        }
     }

==== //depot/perl/perlapi.h#79 (text+w) ====
Index: perl/perlapi.h
--- perl/perlapi.h#78~18355~    Fri Dec 27 18:05:14 2002
+++ perl/perlapi.h      Wed Jan 15 17:58:39 2003
@@ -584,14 +584,14 @@
 #define PL_utf8_upper          (*Perl_Iutf8_upper_ptr(aTHX))
 #undef  PL_utf8_xdigit
 #define PL_utf8_xdigit         (*Perl_Iutf8_xdigit_ptr(aTHX))
+#undef  PL_utf8locale
+#define PL_utf8locale          (*Perl_Iutf8locale_ptr(aTHX))
 #undef  PL_uudmap
 #define PL_uudmap              (*Perl_Iuudmap_ptr(aTHX))
 #undef  PL_wantutf8
 #define PL_wantutf8            (*Perl_Iwantutf8_ptr(aTHX))
 #undef  PL_warnhook
 #define PL_warnhook            (*Perl_Iwarnhook_ptr(aTHX))
-#undef  PL_widesyscalls
-#define PL_widesyscalls                (*Perl_Iwidesyscalls_ptr(aTHX))
 #undef  PL_xiv_arenaroot
 #define PL_xiv_arenaroot       (*Perl_Ixiv_arenaroot_ptr(aTHX))
 #undef  PL_xiv_root

==== //depot/perl/pod/perlrun.pod#69 (text) ====
Index: perl/pod/perlrun.pod
--- perl/pod/perlrun.pod#68~18489~      Wed Jan 15 12:55:00 2003
+++ perl/pod/perlrun.pod        Wed Jan 15 17:58:39 2003
@@ -266,11 +266,21 @@
 
 =item B<-C>
 
-enables Perl to use the native wide character APIs on the target system.
-The magic variable C<${^WIDE_SYSTEM_CALLS}> reflects the state of
-this switch.  See L<perlvar/"${^WIDE_SYSTEM_CALLS}">.
+enables Perl to use the Unicode APIs on the target system.
 
-This feature is currently only implemented on the Win32 platform.
+As of Perl 5.8.1, if C<-C> is used and the locale settings (the LC_ALL,
+LC_CTYPE, and LANG environment variables) indicate a UTF-8 locale,
+the STDIN is expected to be in UTF-8, the STDOUT and STDERR are
+expected to be in UTF-8, and C<:utf8> is the default file open layer.
+See L<perluniintro>, L<perlfunc/open>, and L<open> for more information.
+The magic variable C<${^UTF8_LOCALE}> reflects this state,
+see L<perlvar/"${^UTF8_LOCALE}">.  (Another way of setting this
+variable is to set the environment variable PERL_UTF8_LOCALE.)
+
+(In Perls earlier than 5.8.1 the C<-C> switch was a Win32-only switch
+that enabled the use of Unicode-aware "wide system call" Win32 APIs.
+This feature was practically unused, however, and the command line
+switch was therefore "recycled".)
 
 =item B<-c>
 

==== //depot/perl/pod/perlunicode.pod#114 (text) ====
Index: perl/pod/perlunicode.pod
--- perl/pod/perlunicode.pod#113~18280~ Tue Dec 10 13:30:10 2002
+++ perl/pod/perlunicode.pod    Wed Jan 15 17:58:39 2003
@@ -67,13 +67,6 @@
 external programs, from information provided by the system (such as %ENV),
 or from literals and constants in the source text.
 
-On Windows platforms, if the C<-C> command line switch is used or the
-${^WIDE_SYSTEM_CALLS} global flag is set to C<1>, all system calls
-will use the corresponding wide-character APIs.  This feature is
-available only on Windows to conform to the API standard already
-established for that platform--and there are very few non-Windows
-platforms that have Unicode-aware APIs.
-
 The C<bytes> pragma will always, regardless of platform, force byte
 semantics in a particular lexical scope.  See L<bytes>.
 
@@ -1050,10 +1043,14 @@
 
 =item *
 
-If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
-contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
-the default encodings of your STDIN, STDOUT, and STDERR, and of
-B<any subsequent file open>, are considered to be UTF-8.
+If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
+contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
+B<and> you enable using UTF-8 either by using the C<-C> command line
+switch or setting the PERL_UTF8_LOCALE environment variable to a true
+value, then the default encodings of your STDIN, STDOUT, and STDERR,
+and of B<any subsequent file open>, are considered to be UTF-8.
+See L<perluniintro>, L<perlfunc/open>, and L<open> for more
+information.  The magic variable C<${^UTF8_LOCALE}> will also be set.
 
 =item *
 
@@ -1410,6 +1407,6 @@
 =head1 SEE ALSO
 
 L<perluniintro>, L<encoding>, L<Encode>, L<open>, L<utf8>, L<bytes>,
-L<perlretut>, L<perlvar/"${^WIDE_SYSTEM_CALLS}">
+L<perlretut>, L<perlvar/"${^UTF8_LOCALE}">
 
 =cut

==== //depot/perl/pod/perluniintro.pod#45 (text) ====
Index: perl/pod/perluniintro.pod
--- perl/pod/perluniintro.pod#44~18051~ Wed Oct 23 11:56:08 2002
+++ perl/pod/perluniintro.pod   Wed Jan 15 17:58:39 2003
@@ -172,13 +172,15 @@
 to this sample program ensures that the output is completely UTF-8,
 and removes the program's warning.
 
-If your locale environment variables (C<LANGUAGE>, C<LC_ALL>,
-C<LC_CTYPE>, C<LANG>) contain the strings 'UTF-8' or 'UTF8',
-regardless of case, then the default encoding of your STDIN, STDOUT,
-and STDERR and of B<any subsequent file open>, is UTF-8.  Note that
-this means that Perl expects other software to work, too: if Perl has
-been led to believe that STDIN should be UTF-8, but then STDIN coming
-in from another command is not UTF-8, Perl will complain about the
+If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>)
+contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
+B<and> you enable using UTF-8 either by using the C<-C> command line
+switch or by setting the PERL_UTF8_LOCALE environment variable to
+a true value, then the default encoding of your STDIN, STDOUT, and
+STDERR, and of B<any subsequent file open>, is UTF-8.  Note that this
+means that Perl expects other software to work, too: if Perl has been
+led to believe that STDIN should be UTF-8, but then STDIN coming in
+from another command is not UTF-8, Perl will complain about the
 malformed UTF-8.
 
 All features that combine Unicode and I/O also require using the new

==== //depot/perl/pod/perlvar.pod#112 (text) ====
Index: perl/pod/perlvar.pod
--- perl/pod/perlvar.pod#111~18467~     Thu Jan  9 13:03:07 2003
+++ perl/pod/perlvar.pod        Wed Jan 15 17:58:39 2003
@@ -1109,6 +1109,16 @@
 B<-T>), 0 for off, -1 when only taint warnings are enabled (i.e. with
 B<-t> or B<-TU>).  This variable is read-only.
 
+=item ${^UTF8_LOCALE}
+
+Reflects whether the locale settings indicated the use of UTF-8 and that
+the use of UTF-8 was enabled either by the C<-C> command line switch or
+by setting the PERL_UTF8_LOCALE environment variable to a true value.
+This variable is read-only.  If true, the STDIN is expected to be in
+UTF-8, the STDOUT and STDERR are in UTF-8, and C<:utf8> is the default
+file open layer.  See L<perluniintro>, L<perlfunc/open>, and L<open>
+for more information.
+
 =item $PERL_VERSION
 
 =item $^V
@@ -1147,21 +1157,6 @@
 
 The current set of warning checks enabled by the C<use warnings> pragma.
 See the documentation of C<warnings> for more details.
-
-=item ${^WIDE_SYSTEM_CALLS}
-
-Global flag that enables system calls made by Perl to use wide character
-APIs native to the system, if available.  This is currently only implemented
-on the Windows platform.
-
-This can also be enabled from the command line using the C<-C> switch.
-
-The initial value is typically C<0> for compatibility with Perl versions
-earlier than 5.6, but may be automatically set to C<1> by Perl if the system
-provides a user-settable default (e.g., C<$ENV{LC_CTYPE}>).
-
-The C<bytes> pragma always overrides the effect of this flag in the current
-lexical scope.  See L<bytes>.
 
 =item $EXECUTABLE_NAME
 
End of Patch.

Reply via email to