Perfect!!!! Thank you so much for the enlightenment!
I will study that until I have it down pat!

Thanks a million!!!
Today you receive the genius award (Sorry Rob :P)

Dan

> 
> Dan Muey wrote:
> 
> > 
> > print "body text: @body\n"; # this needs to keep the tags were they 
> > are**
> >
> 
> that' fairly easy to do:
> 
> #!/usr/bin/perl -w
> use strict;
> 
> use HTMP::Parser;
> 
> my $text = <<HTML;
> <html><head>
> <title> HI Title </title>
> heaD STUFF
>  </head>
>  <body bodytag=attributes>
> <i> keep the I tag </i>
>  hI HERE'S CONTENT i WANT
> <img src=""> IMaGE
>  <!-- i WANT TO STRIP COMMENTS OUT -->
>  <SCRIPT>
> 
>  i DON'T WANT THIS SCRIPT EITHER
>  </SCRIPT>
>  <font>Hello world</font>
> 
>  </BODY>
>  </HTMl>
> HTML
> 
> my $body = 0;
> my $title = 0;
> my @body;
> my @title;
> my %body_attr;
> 
> my $html = HTML::Parser->new(api_version => 3,
>                                 text_h => [\&text,'dtext'],
>                                 start_h => [\&open_tag, 
> 'tagname,attr'],
>                                 end_h   => [\&close_tag, 'tagname']);
> $html->ignore_elements(qw(script comment)); 
> $html->parse($text); $html->eof;
> 
> print "title is:\n@title\n\n";
> print "body text:\n@body\n\n";
> print "body attr:\n";
> while(my($k,$v) = each %body_attr){
>         print "$k=$v\n";
> }
> 
> sub text{
> 
>         my $text = shift;
> 
>         return unless($text =~ /\w/);
> 
>         if($title){
>                 push(@title,$text);
>         }elsif($body){
>                 push(@body,$text);
>         }
> }
> 
> sub open_tag{
> 
>         my $tagname = shift;
>         my $attr    = shift;
> 
>         $title = 1 if($tagname eq 'title');
> 
>         if($tagname eq 'body'){
>                 $body = 1;
>                 while(my($key,$value) = each %{$attr}){
>                         $body_attr{$key} = "'$value'";
>                 }
>         }elsif($body){
>                 my $t = '';
>                 while(my($key,$value) = each %{$attr}){
>                         $t .= "$key='$value' ";
>                 }
>                 $t =~ s/\s$//;
>                 push(@body,"<$tagname" . ($t ? " $t>" : '>'));
>         }
> }
> 
> sub close_tag{
> 
>         my $tagname = shift;
> 
>         $title = 0 if($tagname eq 'title');
>         $body  = 0 if($tagname eq 'body');
> 
>         push(@body,"</$tagname>") if($body);
> }
> 
> __END__
> 
> prints:
> 
> title is:
>  HI Title
> 
> body text:
> <i>  keep the I tag  </i>
>  hI HERE'S CONTENT i WANT
>  <img src=''>  IMaGE
>   <font> Hello world </font>
> 
> body attr.:
> bodytag='attributes'
> 
> david
> 
> -- 
> To unsubscribe, e-mail: [EMAIL PROTECTED]
> For additional commands, e-mail: [EMAIL PROTECTED]
> 
> 

--
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to