My module works well enough, but I'm getting to the point where I need multiple parse trees existing at the same time in a mod_perl environment. The way my module is now, they could get mixed up, because I can't find a way to pass a custom variable to the event handler subroutines of HTML::Parser.
I've figured that if I subclass it, I can create a new object for each parse tree instead of just returning an array reference. Here is my current code:
package SkylineEdit;
use HTML::Parser ();
@ISA = ('Exporter');
@EXPORT = ('html_to_htmltree', 'htmltree_to_html', 'get_node_content', 'set_node_content');
my $htmltree; my $node; my @prevnodes; my $htmloutput;
sub start { my $tagname = shift; my $attr = shift; my $newnode = {};
$newnode->{tag} = $tagname; foreach my $key(keys %{$attr}) { $newnode->{$key} = $attr->{$key}; } $newnode->{content} = []; push @prevnodes, $node; push @{$node}, $newnode; $node = $newnode->{content}; }
sub end { my $tagname = shift;
$node = pop @prevnodes; }
sub text { my $text = shift;
chomp $text; # $text =~ s/(^\n|\n$)//gs; if($text ne '') { push @{$node}, $text; } }
sub set_node_content { my $htmltree = shift; my $node = shift; my $content = shift; my $tmpnode = $htmltree->[0];
$node =~ s/^\d+\.//; while($node =~ /(\d+)\.?/cg) { $tmpnode = $tmpnode->{content}->[$1]; } $tmpnode->{content} = [$content];
return $htmltree; }
sub get_node_content { my $htmltree = shift; my $node = shift; my $levels = shift || 0; my $tmpnode = $htmltree->[0];
$node =~ s/^\d+\.//; while($node =~ /(\d+)\.?/cg) { $tmpnode = $tmpnode->{content}->[$1]; } descend_htmltree($tmpnode->{content}, 0, "");
return $htmloutput; }
sub descend_htmltree { my $node = shift; my $withclickiness = shift; my $node_id = shift; my $colors = { td => '#ff0000', p => '#aaaaaa', table => '#ff0000' };
my $node_counter = 0;
foreach my $tmpnode (@{$node}) {
if(ref($tmpnode) eq 'HASH') {
my $nodeid = "${node_id}.${node_counter}";
$htmloutput .= "<div style='border: thin solid " . $colors->{$tmpnode->{tag}} . "; margin: 1px 1px 1px 1px'>" if($withclickiness && $tmpnode->{tag} eq 'table');
$htmloutput .= "<$tmpnode->{tag}";
foreach(keys %{$tmpnode}) {
$htmloutput .= " $_=\"$tmpnode->{$_}\"" if($_ ne 'tag' && $_ ne 'content');
}
$htmloutput .= ">";
$htmloutput .= "<div style='padding: 1px 1px 1px 1px; border: thin solid " . $colors->{$tmpnode->{tag}} . "; margin: 1px 1px 1px 1px' onDblClick=\"parent.location = '/editor/editfile.html?action=edittext&node=${nodeid}&tmpfile='+tmpfile+'&filename='+filename\">" if($withclickiness && ($tmpnode->{tag} eq 'p' || $tmpnode->{tag} eq 'td'));
descend_htmltree($tmpnode->{content}, $withclickiness, $nodeid);
$htmloutput .= "</div>" if($withclickiness && ($tmpnode->{tag} eq 'p' || $tmpnode->{tag} eq 'td'));
$htmloutput .= "</$tmpnode->{tag}>" if($tmpnode->{tag} ne 'br');
$htmloutput .= "</div>" if($withclickiness && $tmpnode->{tag} eq 'table');
} else {
# my $nodeid = "${node_id}.${node_counter}";
$htmloutput .= "$tmpnode";
}
$node_counter++;
}
}
sub htmltree_to_html { my $filename = shift || ''; my $withclickiness = shift || 0; my $htmltree = shift;
descend_htmltree($htmltree->[0]->{content}, $withclickiness, "0"); if($filename ne '') { open HTML, "> $filename" or die "Can't open $filename for HTML output"; print HTML $htmloutput; close HTML; }
return $htmloutput; }
sub html_to_htmltree { my $filename = shift; my $html = shift || ''; # my $rightpane = shift || 0; # my $htmltree;
$htmltree = [ { tag => 'document', content => [] } ]; $node = $htmltree->[0]->{content}; @prevnodes = ($htmltree); $htmloutput = ""; my $p = HTML::Parser->new( api_version => 3, start_h => [\&start, "tagname, attr"], end_h => [\&end, "tagname"], text_h => [\&text, "dtext"] ); if($filename ne '') { open HTML, "< $filename" or die "Can't open input HTML file"; $html = ""; while(<HTML>) { $html .= $_; } close HTML; # $html =~ s|(</?)%(\w+?>)|${1}_${2}|sg; } return undef if($html =~ /<\%\w+?>/s); $p->parse($html); $p->eof;
return $htmltree; }
1;
What changes do I need to make so that I can do something like the following? Thanks for any help.
use SkylineEdit;
my $htmltree = SkylineEdit->new; $htmltree->html_to_htmltree($somefile);
-- Andrew Gaffney Network Administrator Skyline Aeronautics, LLC. 636-357-1548
-- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] <http://learn.perl.org/> <http://learn.perl.org/first-response>