>From the HTML::TreeBuilder 3.08 documentation:
$root->store_comments(value)
This determines whether TreeBuilder will normally
store comments found while parsing content into $root.
Currently, this is off by default.
$root->store_declarations(value)
This determines whether TreeBuilder will normally
store markup declarations found while parsing content
into $root. Currently, this is off by default.
It is somewhat of a known bug (to be fixed one of
these days, if anyone needs it?) that declarations in
the preamble (before the "html" start-tag) end up
actually under the "html" element.
As noted in the code, this also applies to store_comments().
And yes, I need it fixed!
A haphazard patch is attached. I am now working on HTML::PrettyPrinter.
--
Reinier Post
TU Eindhoven
--- TreeBuilder.pm.orig Thu Nov 30 16:54:10 2000
+++ TreeBuilder.pm Thu Nov 30 22:46:53 2000
@@ -1050,10 +1050,67 @@
# TODO: test whether comment(), declaration(), and process(), do the right
# thing as far as tightening and whatnot.
-# Also, currently, doctypes and comments that appear before head or body
-# show up in the tree in the wrong place. Something should be done about
-# this. Tricky. Maybe this whole business of pre-making the body and
-# whatnot is wrong.
+#
+# Special code has been added in order to make doctypes, comments and
+# processing instructions that appear before head or body
+# show up in the tree in the right place. Goes to show that
+# this whole business of pre-making the body and whatnot is wrong.
+#
+# Anyway, the code tries to circumvent this by inserting two 'hooks'
+# in the code: one interferes at the moment they are parsed, the other
+# at the moment a tree is printed.
+# The first intervention is accomplished by doing all node insertions
+# through the special, undocumented _push_content() function, which
+# performs the marking; the second is implemented by overriding the as_*()
+# methods.
+# Note that this still doesn't position all special elements correctly,
+# if no <html> tag is found at all.
+# Also, the overriding of as_HTMl isn't undocumented to the user!
+
+sub _push_content
+{
+ my ($pos,$e,@rest) = @_;
+
+ if (ref $e && $e->tag =~ /^~/
+ # I am 'special'
+ && ref $pos && $pos->tag eq 'html' && $pos->implicit
+ # I am the child of a (still?) implicit <html> element
+ && !grep { ref $_ && !exists $_->{_initial} && !$_->implicit }
+ $pos->content_list
+ # all my siblings are initial or implicit
+ ) {
+ $e->{_initial} = 1;
+ if(DEBUG) {
+ my @lineage_tags = $pos->lineage_tag_names;
+ my $indent = ' ' x (1 + @lineage_tags);
+
+ print
+ $indent, "Marking this element as initial.\n";
+ }
+ }
+
+ $pos->push_content($e,@rest);
+}
+
+sub as_HTML
+{
+ my ($self,@rest) = @_;
+
+ if ($self->tag ne 'html') {
+ HTML::Element::as_HTML($self,@rest);
+
+ } else {
+
+ # copy $self except its children marked 'initial'
+ my $h = HTML::Element->new('html', $self->all_external_attr);
+ $h->push_content(grep { !$_->{_initial} } $self->content_list);
+
+ # print all of $self's children marked 'initial', then the copy
+ my @c = map { $_->as_HTML(@rest) if $_->{_initial} } $self->content_list;
+ join('',grep {defined $_} (@c,$h->as_HTML(@rest)));
+ }
+}
+
sub comment {
return if $_[0]{'_stunted'};
@@ -1080,7 +1137,7 @@
(my $e = (
$self->{'_element_class'} || 'HTML::Element'
)->new('~comment'))->{'text'} = $text;
- $pos->push_content($e);
+ &_push_content($pos,$e);
&{ $self->{'_tweak_~comment'}
|| $self->{'_tweak_*'}
@@ -1119,7 +1176,7 @@
(my $e = (
$self->{'_element_class'} || 'HTML::Element'
)->new('~declaration'))->{'text'} = $text;
- $pos->push_content($e);
+ &_push_content($pos,$e);
&{ $self->{'_tweak_~declaration'}
|| $self->{'_tweak_*'}
@@ -1154,7 +1211,7 @@
(my $e = (
$self->{'_element_class'} || 'HTML::Element'
)->new('~pi'))->{'text'} = $text;
- $pos->push_content($e);
+ &_push_content($pos,$e);
&{ $self->{'_tweak_~pi'}
|| $self->{'_tweak_*'}