"cpaul" <[EMAIL PROTECTED]> writes:

> > use HTML::PullParser;
> > use Data::Dump qw(dump);
> > 
> > $doc = <<'EOT';
> > <TITLE>Foo</TITLE>
> > <script>
> > <foo>
> > </script>
> > 
> > <h1>Hi</h1>
> > EOT
> > 
> > my $p = HTML::PullParser->new(doc => $doc,
> >       start => 'event,tagname',
> >       end   => 'event,tagname',
> >       ignore_elements => ['script'],
> >      );
> > 
> > while (my $t = $p->get_token) {
> >     print dump($t), "\n";
> > }
> > ---------------------------------------------------------------------
> > This will produce:
> > 
> > ["start", "title"]
> > ["end", "title"]
> > ["start", "h1"]
> > ["end", "h1"]
> 
> 
> how to access the "Hi" within <h1> tags?

You need to ask for something to be reported for text.  This is an
expanded sample:

--------------------------------------------------------------------
use HTML::PullParser;
use Data::Dump qw(dump);

$doc = <<'EOT';
<TITLE>&lt;Foo&gt;</TITLE>
<script>
<foo>
</script>

<h1>Hi <a href="#dude">dude</a></h1>
EOT

my $p = HTML::PullParser->new(doc => $doc,
                              start => 'event,tagname',
                              end   => 'event,tagname',
                              text  => '@{dtext}',
                              unbroken_text => 1,
                              ignore_elements => ['script'],
                             );

while (my $t = $p->get_token) {
    print dump($t), "\n";
}
--------------------------------------------------------------------

This prints:

["start", "title"]
"<Foo>"
["end", "title"]
"\n\n\n"
["start", "h1"]
"Hi "
["start", "a"]
"dude"
["end", "a"]
["end", "h1"]
"\n"

Reply via email to