In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/613c63b465f01af4e535fdc6c1c17e7470be5aad?hp=7a4d6ad6921760cfbf05a181861e2cddaf121a45>
- Log ----------------------------------------------------------------- commit 613c63b465f01af4e535fdc6c1c17e7470be5aad Author: Chip Salzenberg <[email protected]> Date: Wed Jul 25 20:27:30 2012 -0700 When setting environment variables via %ENV, force values to be strings only (turning off other OK flags), make them byte strings; if wide characters can't be downgraded to bytes, leave the string utf8 and issue a warning. M mg.c M pod/perldelta.pod M t/op/magic.t commit 8bb025ae770b0414ade11bcc76d6cce7de221857 Author: Chip Salzenberg <[email protected]> Date: Wed Jul 25 18:41:50 2012 -0700 Make all the SvPV*force* macros always return sv with SvPOK_only, as API docs always claimed they did. Also update those docs to be clearer. M sv.c M sv.h ----------------------------------------------------------------------- Summary of changes: mg.c | 15 +++++++++-- pod/perldelta.pod | 27 ++++++++++++++++--- sv.c | 1 + sv.h | 29 +++++++++++++-------- t/op/magic.t | 71 ++++++++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 115 insertions(+), 28 deletions(-) diff --git a/mg.c b/mg.c index 2705109..f4979f1 100644 --- a/mg.c +++ b/mg.c @@ -1165,13 +1165,22 @@ Perl_magic_setenv(pTHX_ SV *sv, MAGIC *mg) dVAR; STRLEN len = 0, klen; const char * const key = MgPV_const(mg,klen); - const char *s = ""; + const char *s = NULL; PERL_ARGS_ASSERT_MAGIC_SETENV; + SvGETMAGIC(sv); if (SvOK(sv)) { - s = SvPV_const(sv,len); - SvPOK_only(sv); /* environment variables are strings, period */ + /* defined environment variables are byte strings; unfortunately + there is no SvPVbyte_force_nomg(), so we must do this piecewise */ + (void)SvPV_force_nomg_nolen(sv); + sv_utf8_downgrade(sv, /* fail_ok */ TRUE); + if (SvUTF8(sv)) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "Wide character in %s", "setenv"); + SvUTF8_off(sv); + } + s = SvPVX(sv); + len = SvCUR(sv); } my_setenv(key, s); /* does the deed */ diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 27ab286..eb4bc06 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -38,13 +38,21 @@ L</Selected Bug Fixes> section. =head1 Incompatible Changes -XXX For a release on a stable branch, this section aspires to be: +[ List each incompatible change as a =head2 entry ] - There are no changes intentionally incompatible with 5.XXX.XXX - If any exist, they are bugs, and we request that you submit a - report. See L</Reporting Bugs> below. +=head2 C<$ENV{foo} = undef> deletes value from environ, like C<delete $ENV{foo}> -[ List each incompatible change as a =head2 entry ] +This facilitates use of C<local()> with C<%ENV> entries. In previous +versions of Perl, C<undef> was converted to the empty string. + +=head2 Defined values stored in environment are forced to byte strings + +A value stored in an environment variable has always been stringified. In +this release, it is converted to be only a byte string. First, it is forced +to be a only a string. Then if the string is utf8 and the equivalent of +C<utf8::downgrade> works, that result is used; otherwise, the equivalent of +C<utf8::encode> is used, and a warning is issued about wide characters +(L</Diagnostics>). =head1 Deprecations @@ -164,6 +172,15 @@ include any changes in L<perldiag> that reconcile it to the C<C> code. XXX Newly added diagnostic messages go here +=over 4 + +=item * + +Attempts to put wide characters into environment variables via %ENV provoke +the warning "Wide character in setenv". + +=back + =head3 New Errors =over 4 diff --git a/sv.c b/sv.c index 3176ec0..7022ce1 100644 --- a/sv.c +++ b/sv.c @@ -9098,6 +9098,7 @@ Perl_sv_pvn_force_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) PTR2UV(sv),SvPVX_const(sv))); } } + (void)SvPOK_only_UTF8(sv); return SvPVX_mutable(sv); } diff --git a/sv.h b/sv.h index 882ba92..291ef3d 100644 --- a/sv.h +++ b/sv.h @@ -924,6 +924,13 @@ in gv.h: */ #define SvPOK_byte_nog(sv) ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8|SVs_GMG)) == SVf_POK) #define SvPOK_byte_nogthink(sv) ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8|SVf_THINKFIRST|SVs_GMG)) == SVf_POK) +#define SvPOK_pure_nogthink(sv) \ + ((SvFLAGS(sv) & (SVf_POK|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP|SVf_THINKFIRST|SVs_GMG)) == SVf_POK) +#define SvPOK_utf8_pure_nogthink(sv) \ + ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP|SVf_THINKFIRST|SVs_GMG)) == (SVf_POK|SVf_UTF8)) +#define SvPOK_byte_pure_nogthink(sv) \ + ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8|SVf_IOK|SVf_NOK|SVf_ROK|SVpgv_GP|SVf_THINKFIRST|SVs_GMG)) == SVf_POK) + /* =for apidoc Am|U32|SvGAMAGIC|SV* sv @@ -1440,14 +1447,14 @@ attention to precisely which outputs are influenced by which inputs. /* =for apidoc Am|char*|SvPV_force|SV* sv|STRLEN len -Like C<SvPV> but will force the SV into containing just a string -(C<SvPOK_only>). You want force if you are going to update the C<SvPVX> -directly. +Like C<SvPV> but will force the SV into containing a string (C<SvPOK>), and +only a (C<SvPOK_only>), by hook or by crook. You want force if you are +going to update the C<SvPVX> directly. Processes get magic. =for apidoc Am|char*|SvPV_force_nomg|SV* sv|STRLEN len -Like C<SvPV> but will force the SV into containing just a string -(C<SvPOK_only>). You want force if you are going to update the C<SvPVX> -directly. Doesn't process magic. +Like C<SvPV> but will force the SV into containing a string (C<SvPOK>), and +only a (C<SvPOK_only>), by hook or by crook. You want force if you are +going to update the C<SvPVX> directly. Doesn't process get magic. =for apidoc Am|char*|SvPV|SV* sv|STRLEN len Returns a pointer to the string in the SV, or a stringified form of @@ -1625,15 +1632,15 @@ Like sv_utf8_upgrade, but doesn't do magic on C<sv>. #define SvPV_force_nomg_nolen(sv) SvPV_force_flags_nolen(sv, 0) #define SvPV_force_flags(sv, lp, flags) \ - (SvPOK_nogthink(sv) \ + (SvPOK_pure_nogthink(sv) \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_pvn_force_flags(sv, &lp, flags)) #define SvPV_force_flags_nolen(sv, flags) \ - (SvPOK_nogthink(sv) \ + (SvPOK_pure_nogthink(sv) \ ? SvPVX(sv) : sv_pvn_force_flags(sv, 0, flags)) #define SvPV_force_flags_mutable(sv, lp, flags) \ - (SvPOK_nogthink(sv) \ + (SvPOK_pure_nogthink(sv) \ ? ((lp = SvCUR(sv)), SvPVX_mutable(sv)) \ : sv_pvn_force_flags(sv, &lp, flags|SV_MUTABLE_RETURN)) @@ -1660,7 +1667,7 @@ Like sv_utf8_upgrade, but doesn't do magic on C<sv>. ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pvutf8(sv, &lp)) #define SvPVutf8_force(sv, lp) \ - (SvPOK_utf8_nogthink(sv) \ + (SvPOK_utf8_pure_nogthink(sv) \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_pvutf8n_force(sv, &lp)) #define SvPVutf8_nolen(sv) \ @@ -1674,7 +1681,7 @@ Like sv_utf8_upgrade, but doesn't do magic on C<sv>. ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pvbyte(sv, &lp)) #define SvPVbyte_force(sv, lp) \ - (SvPOK_byte_nogthink(sv) \ + (SvPOK_byte_pure_nogthink(sv) \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_pvbyten_force(sv, &lp)) #define SvPVbyte_nolen(sv) \ diff --git a/t/op/magic.t b/t/op/magic.t index 643eeb6..c6c796d 100644 --- a/t/op/magic.t +++ b/t/op/magic.t @@ -5,7 +5,7 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; require './test.pl'; - plan (tests => 156); + plan (tests => 171); } # Test that defined() returns true for magic variables created on the fly, @@ -63,6 +63,17 @@ $PERL = $ENV{PERL} $Is_MSWin32 ? '.\perl' : './perl'); +sub env_is { + my ($key, $val, $desc) = @_; + if ($Is_MSWin32) { + # cmd.exe will echo 'variable=value' but 4nt will echo just the value + # -- Nikola Knezevic + like `set $key`, qr/^(?:\Q$key\E=)?\Q$val\E$/, $desc; + } else { + is `echo \$\Q$key\E`, "$val\n", $desc; + } +} + END { # On VMS, environment variable changes are peristent after perl exits delete $ENV{'FOO'} if $Is_VMS; @@ -604,15 +615,57 @@ SKIP: { } } - $ENV{__NoNeSuCh} = "foo"; - $0 = "bar"; -# cmd.exe will echo 'variable=value' but 4nt will echo just the value -# -- Nikola Knezevic - if ($Is_MSWin32) { - like `set __NoNeSuCh`, qr/^(?:__NoNeSuCh=)?foo$/; - } else { - is `echo \$__NoNeSuCh`, "foo\n"; + $ENV{__NoNeSuCh} = 'foo'; + $0 = 'bar'; + env_is(__NoNeSuCh => 'foo', 'setting $0 does not break %ENV'); + + # stringify a glob + $ENV{foo} = *TODO; + env_is(foo => '*main::TODO', 'ENV store of stringified glob'); + + # stringify a ref + my $ref = []; + $ENV{foo} = $ref; + env_is(foo => "$ref", 'ENV store of stringified ref'); + + # downgrade utf8 when possible + $bytes = "eh zero \x{A0}"; + utf8::upgrade($chars = $bytes); + $forced = $ENV{foo} = $chars; + ok(!utf8::is_utf8($forced) && $forced eq $bytes, 'ENV store downgrades utf8 in SV'); + env_is(foo => $bytes, 'ENV store downgrades utf8 in setenv'); + + # warn when downgrading utf8 is not possible + $chars = "X-Day \x{1998}"; + utf8::encode($bytes = $chars); + { + my $warned = 0; + local $SIG{__WARN__} = sub { ++$warned if $_[0] =~ /^Wide character in setenv/; print "# @_" }; + $forced = $ENV{foo} = $chars; + ok($warned == 1, 'ENV store warns about wide characters'); } + ok(!utf8::is_utf8($forced) && $forced eq $bytes, 'ENV store encodes high utf8 in SV'); + env_is(foo => $bytes, 'ENV store encodes high utf8 in SV'); + + # test local $ENV{foo} on existing foo + { + local $ENV{__NoNeSuCh}; + { local $TODO = 'exists on %ENV should reflect real env'; + ok(!exists $ENV{__NoNeSuCh}, 'not exists $ENV{existing} during local $ENV{existing}'); } + env_is(__NoNeLoCaL => ''); + } + ok(exists $ENV{__NoNeSuCh}, 'exists $ENV{existing} after local $ENV{existing}'); + env_is(__NoNeSuCh => 'foo'); + + # test local $ENV{foo} on new foo + { + local $ENV{__NoNeLoCaL} = 'foo'; + ok(exists $ENV{__NoNeLoCaL}, 'exists $ENV{new} during local $ENV{new}'); + env_is(__NoNeLoCaL => 'foo'); + } + ok(!exists $ENV{__NoNeLoCaL}, 'not exists $ENV{new} after local $ENV{new}'); + env_is(__NoNeLoCaL => ''); + SKIP: { skip("\$0 check only on Linux and FreeBSD", 2) unless $^O =~ /^(linux|freebsd)$/ -- Perl5 Master Repository
