Perfect!!!! Thank you so much for the enlightenment!
I will study that until I have it down pat!
Thanks a million!!!
Today you receive the genius award (Sorry Rob :P)
Dan
>
> Dan Muey wrote:
>
> >
> > print "body text: @body\n"; # this needs to keep the tags were they
> > are**
> >
>
> that' fairly easy to do:
>
> #!/usr/bin/perl -w
> use strict;
>
> use HTMP::Parser;
>
> my $text = <<HTML;
> <html><head>
> <title> HI Title </title>
> heaD STUFF
> </head>
> <body bodytag=attributes>
> <i> keep the I tag </i>
> hI HERE'S CONTENT i WANT
> <img src=""> IMaGE
> <!-- i WANT TO STRIP COMMENTS OUT -->
> <SCRIPT>
>
> i DON'T WANT THIS SCRIPT EITHER
> </SCRIPT>
> <font>Hello world</font>
>
> </BODY>
> </HTMl>
> HTML
>
> my $body = 0;
> my $title = 0;
> my @body;
> my @title;
> my %body_attr;
>
> my $html = HTML::Parser->new(api_version => 3,
> text_h => [\&text,'dtext'],
> start_h => [\&open_tag,
> 'tagname,attr'],
> end_h => [\&close_tag, 'tagname']);
> $html->ignore_elements(qw(script comment));
> $html->parse($text); $html->eof;
>
> print "title is:\n@title\n\n";
> print "body text:\n@body\n\n";
> print "body attr:\n";
> while(my($k,$v) = each %body_attr){
> print "$k=$v\n";
> }
>
> sub text{
>
> my $text = shift;
>
> return unless($text =~ /\w/);
>
> if($title){
> push(@title,$text);
> }elsif($body){
> push(@body,$text);
> }
> }
>
> sub open_tag{
>
> my $tagname = shift;
> my $attr = shift;
>
> $title = 1 if($tagname eq 'title');
>
> if($tagname eq 'body'){
> $body = 1;
> while(my($key,$value) = each %{$attr}){
> $body_attr{$key} = "'$value'";
> }
> }elsif($body){
> my $t = '';
> while(my($key,$value) = each %{$attr}){
> $t .= "$key='$value' ";
> }
> $t =~ s/\s$//;
> push(@body,"<$tagname" . ($t ? " $t>" : '>'));
> }
> }
>
> sub close_tag{
>
> my $tagname = shift;
>
> $title = 0 if($tagname eq 'title');
> $body = 0 if($tagname eq 'body');
>
> push(@body,"</$tagname>") if($body);
> }
>
> __END__
>
> prints:
>
> title is:
> HI Title
>
> body text:
> <i> keep the I tag </i>
> hI HERE'S CONTENT i WANT
> <img src=''> IMaGE
> <font> Hello world </font>
>
> body attr.:
> bodytag='attributes'
>
> david
>
> --
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
>
>
--
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]