Change 18165 by rgs@rgs-home on 2002/11/19 22:45:36

        Subject: Re: [PATCH] UTF-8 enabling via locale (was: Re: Redhat 8 issue?)
        From: Jarkko Hietaniemi <[EMAIL PROTECTED]>
        Date: Sun, 3 Nov 2002 17:50:08 +0200
        Message-ID: <[EMAIL PROTECTED]>

Affected files ...

.... //depot/perl/lib/open.pm#40 edit
.... //depot/perl/locale.c#10 edit
.... //depot/perl/pod/perl58delta.pod#6 edit

Differences ...

==== //depot/perl/lib/open.pm#40 (text) ====
Index: perl/lib/open.pm
--- perl/lib/open.pm#39~17410~  Sun Jul  7 13:31:37 2002
+++ perl/lib/open.pm    Tue Nov 19 14:45:36 2002
@@ -27,6 +27,7 @@
            } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
                ($country_language, $locale_encoding) = ($1, $2);
            }
+           # LANGUAGE affects only LC_MESSAGES only on glibc
        } elsif (not $locale_encoding) {
            if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
                $ENV{LANG}   =~ /\butf-?8\b/i) {
@@ -250,7 +251,7 @@
 
 =back
 
-If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
+If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
 contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
 the default encoding of your STDIN, STDOUT, and STDERR, and of
 B<any subsequent file open>, is UTF-8.

==== //depot/perl/locale.c#10 (text) ====
Index: perl/locale.c
--- perl/locale.c#9~17181~      Mon Jun 10 20:27:24 2002
+++ perl/locale.c       Tue Nov 19 14:45:36 2002
@@ -478,10 +478,15 @@
       /* Set PL_wantutf8 to TRUE if using PerlIO _and_
         any of the following are true:
         - nl_langinfo(CODESET) contains /^utf-?8/i
-        - $ENV{LANGUAGE} contains /^utf-?8/i (only if using glibc)
-        - $ENV{LC_CALL} contains /^utf-?8/i
+        - $ENV{LC_ALL}   contains /^utf-?8/i
         - $ENV{LC_CTYPE} contains /^utf-?8/i
-        - $ENV{LANG} contains /^utf-?8/i
+        - $ENV{LANG}     contains /^utf-?8/i
+        The LC_ALL, LC_CTYPE, LANG obey the usual override
+        hierarchy of locale environment variables.  (LANGUAGE
+        affects only LC_MESSAGES only under glibc.) (If present,
+        it overrides LC_MESSAGES for GNU gettext, and it also
+        can have more than one locale, separated by spaces,
+        in case you need to know.)
         If PL_wantutf8 is true, perl.c:S_parse_body()
         will turn on the PerlIO :utf8 discipline on STDIN, STDOUT,
         STDERR, _and_ the default open discipline.
@@ -491,32 +496,26 @@
 #if defined(HAS_NL_LANGINFO) && defined(CODESET)
         codeset = nl_langinfo(CODESET);
 #endif
-        if (codeset &&
-            (ibcmp(codeset,  "UTF-8", 5) == 0 ||
-             ibcmp(codeset,  "UTF8",  4) == 0))
-             wantutf8 = TRUE;
+        if (codeset)
+             wantutf8 = (ibcmp(codeset,  "UTF-8", 5) == 0 ||
+                         ibcmp(codeset,  "UTF8",  4) == 0);
 #if defined(USE_LOCALE)
-#ifdef __GLIBC__
-        if (!wantutf8 && language &&
-            (ibcmp(language, "UTF-8", 5) == 0 ||
-             ibcmp(language, "UTF8",  4) == 0))
-             wantutf8 = TRUE;
-#endif
-        if (!wantutf8 && lc_all &&
-            (ibcmp(lc_all,   "UTF-8", 5) == 0 ||
-             ibcmp(lc_all,   "UTF8",  4) == 0))
-             wantutf8 = TRUE;
+        else { /* nl_langinfo(CODESET) is supposed to correctly
+                * interpret the locale environment variables,
+                * but just in case it fails, let's do this manually. */ 
+             if (lang)
+                  wantutf8 = (ibcmp(lang,     "UTF-8", 5) == 0 ||
+                              ibcmp(lang,     "UTF8",  4) == 0);
 #ifdef USE_LOCALE_CTYPE
-        if (!wantutf8 && curctype &&
-            (ibcmp(curctype,     "UTF-8", 5) == 0 ||
-             ibcmp(curctype,     "UTF8",  4) == 0))
-             wantutf8 = TRUE;
+             if (curctype)
+                  wantutf8 = (ibcmp(curctype,     "UTF-8", 5) == 0 ||
+                              ibcmp(curctype,     "UTF8",  4) == 0);
 #endif
-        if (!wantutf8 && lang &&
-            (ibcmp(lang,     "UTF-8", 5) == 0 ||
-             ibcmp(lang,     "UTF8",  4) == 0))
-             wantutf8 = TRUE;
+             if (lc_all)
+                  wantutf8 = (ibcmp(lc_all,   "UTF-8", 5) == 0 ||
+                              ibcmp(lc_all,   "UTF8",  4) == 0);
 #endif /* USE_LOCALE */
+        }
         if (wantutf8)
              PL_wantutf8 = TRUE;
     }

==== //depot/perl/pod/perl58delta.pod#6 (text) ====
Index: perl/pod/perl58delta.pod
--- perl/pod/perl58delta.pod#5~18147~   Sat Nov 16 11:52:54 2002
+++ perl/pod/perl58delta.pod    Tue Nov 19 14:45:36 2002
@@ -415,12 +415,12 @@
 
 =item *
 
-If your environment variables (LC_ALL, LC_CTYPE, LANG, LANGUAGE) look
-like you want to use UTF-8 (any of the the variables match C</utf-?8/i>),
-your STDIN, STDOUT, STDERR handles and the default open layer
-(see L<open>) are marked as UTF-8.  (This feature, like other new
-features that combine Unicode and I/O, work only if you are using
-PerlIO, but that's the default.)
+If your environment variables (LC_ALL, LC_CTYPE, LANG) look like you
+want to use UTF-8 (any of the the variables match C</utf-?8/i>), your
+STDIN, STDOUT, STDERR handles and the default open layer (see L<open>)
+are marked as UTF-8.  (This feature, like other new features that
+combine Unicode and I/O, work only if you are using PerlIO, but that's
+the default.)
 
 Note that after this Perl really does assume that everything is UTF-8:
 for example if some input handle is not, Perl will probably very soon
End of Patch.

Reply via email to