Not sure if this works perfect, but the test makes sure it doesn't
emit warnings in UTF-8 webpages, even with the parse_head option on.

=== lib/LWP/Protocol.pm
==================================================================
--- lib/LWP/Protocol.pm (revision 5525)
+++ lib/LWP/Protocol.pm (local)
@@ -103,7 +103,16 @@
    my $parser;
    if ($parse_head && $response->content_type eq 'text/html') {
       require HTML::HeadParser;
-       $parser = HTML::HeadParser->new($response->{'_headers'});
+        require HTTP::Headers::Util;
+        $parser = HTML::HeadParser->new($response->{'_headers'});
+
+       if (my @ct =
HTTP::Headers::Util::split_header_words($response->header("Content-Type")))
{
+           my(undef, undef, %ct_param) = @{$ct[-1]};
+            if ($ct_param{charset} && $ct_param{charset} eq 'utf-8') {
+                $parser->utf8_mode(1);
+            }
+        }
+
    }
    my $content_size = 0;

=== t/live/utf8.t
==================================================================
--- t/live/utf8.t       (revision 5525)
+++ t/live/utf8.t       (local)
@@ -0,0 +1,18 @@
+use strict;
+use warnings;
+
+use Test::More tests => 2;
+use LWP::UserAgent;
+
+my $warnings;
+local $SIG{__WARN__} = sub { $warnings .= "@_" };
+
+my $url = 
"http://ja.wikipedia.org/wiki/%E3%83%A1%E3%82%A4%E3%83%B3%E3%83%9A%E3%83%BC%E3%82%B8";;
+my $ua  = LWP::UserAgent->new;
+my $res = $ua->get($url);
+
+like $res->header('Content-Type'), qr/charset=utf-8/;
+ok !$warnings, "No warnings";
+
+
+


--
Tatsuhiko Miyagawa

Reply via email to