"cpaul" <[EMAIL PROTECTED]> writes:
> > use HTML::PullParser;
> > use Data::Dump qw(dump);
> >
> > $doc = <<'EOT';
> > <TITLE>Foo</TITLE>
> > <script>
> > <foo>
> > </script>
> >
> > <h1>Hi</h1>
> > EOT
> >
> > my $p = HTML::PullParser->new(doc => $doc,
> > start => 'event,tagname',
> > end => 'event,tagname',
> > ignore_elements => ['script'],
> > );
> >
> > while (my $t = $p->get_token) {
> > print dump($t), "\n";
> > }
> > ---------------------------------------------------------------------
> > This will produce:
> >
> > ["start", "title"]
> > ["end", "title"]
> > ["start", "h1"]
> > ["end", "h1"]
>
>
> how to access the "Hi" within <h1> tags?
You need to ask for something to be reported for text. This is an
expanded sample:
--------------------------------------------------------------------
use HTML::PullParser;
use Data::Dump qw(dump);
$doc = <<'EOT';
<TITLE><Foo></TITLE>
<script>
<foo>
</script>
<h1>Hi <a href="#dude">dude</a></h1>
EOT
my $p = HTML::PullParser->new(doc => $doc,
start => 'event,tagname',
end => 'event,tagname',
text => '@{dtext}',
unbroken_text => 1,
ignore_elements => ['script'],
);
while (my $t = $p->get_token) {
print dump($t), "\n";
}
--------------------------------------------------------------------
This prints:
["start", "title"]
"<Foo>"
["end", "title"]
"\n\n\n"
["start", "h1"]
"Hi "
["start", "a"]
"dude"
["end", "a"]
["end", "h1"]
"\n"