But when running a robot, LWP::Protocol emits a warning as it works because the content stream is not decoded into perl's native character set.
One possible fix is:
@@ -109,8 +110,14 @@
if (!defined($arg) || !$response->is_success) {
+ my $encoding;
+ if ($response->header("Content-Type") && $response->header("Content-Type") =~ m/;\s*charset\s*=\s*(\S+)\s*$/i) {
+ $encoding = $1;
+ } else {
+ $encoding = "iso-8859-1";
+ }
# scalar
while ($content = &$collector, length $$content) {
if ($parser) {
- $parser->parse($$content) or undef($parser);
+ $parser->parse(decode($encoding,$$content)) or undef($parser);
}
LWP::Debug::debug("read " . length($$content) . " bytes");
I have attached this as a patch and a unified diff, which you may use as you see fit.
-- K
--- /usr/lib/perl5/vendor_perl/5.8.3/LWP/Protocol.pm 2004-04-09 11:36:52.000000000 -0400 +++ Protocol.pm 2005-01-03 16:58:23.000000000 -0500 @@ -9,4 +9,5 @@ use strict; use Carp (); +use Encode; use HTTP::Status (); use HTTP::Response; @@ -109,8 +110,14 @@ if (!defined($arg) || !$response->is_success) { + my $encoding; + if ($response->header("Content-Type") && $response->header("Content-Type") =~ m/;\s*charset\s*=\s*(\S+)\s*$/i) { + $encoding = $1; + } else { + $encoding = "iso-8859-1"; + } # scalar while ($content = &$collector, length $$content) { if ($parser) { - $parser->parse($$content) or undef($parser); + $parser->parse(decode($encoding,$$content)) or undef($parser); } LWP::Debug::debug("read " . length($$content) . " bytes");
10a11 > use Encode; 110a112,117 > my $encoding; > if ($response->header("Content-Type") && > $response->header("Content-Type") =~ m/;\s*charset\s*=\s*(\S+)\s*$/i) { > $encoding = $1; > } else { > $encoding = "iso-8859-1"; > } 114c121 < $parser->parse($$content) or undef($parser); --- > $parser->parse(decode($encoding,$$content)) or undef($parser);