Hi there, the following produces a segfault using the latest version of libwww.
As it seems, HTML::Parser is marking non UTF8 strings as UTF8 strings. use HTML::TokeParser; use LWP::Simple; use URI::URL; $data = get("http://www.aries.lu/site.php?section=movies"); my $tp = HTML::TokeParser->new(\$data); while (my $token = $tp->get_token) { my $ttype = shift @{ $token }; if($ttype eq "S") # start tag? { my($tag, $attr, $attrseq, $rawtxt) = @{ $token }; $tag = lc($tag); if($tag eq "a") { my $a_href = $attr->{'href'}; my $a_encl = $tp->get_trimmed_text("/$tag"); print "$a_href\n"; $a_href = url($a_href, $docurl)->abs if ($a_href ne ""); } } } or to see it: #!/usr/bin/perl use warnings; use strict; use Devel::Peek; use HTML::Parser; my $html = qq{<img title="’\260">}; my $p = HTML::Parser->new(api_version=>3,start_h=>[sub{Dump(shift- >{title})}, "attr"]); $p->parse($html); Thibaut