I am looking into switching to Pod::Simple on search.cpan.org in the hope
it will help performance.

Attached is the first of two patches working towards that goal. It makes
the output of filter() pass the W3 validation for HTML 4.01 strict. The
main changes to achieve this are.

* Remove the <p class=pad></p> as they were not valid in the positions
  they were added. Also any layout adjustment should be done via css
  not by dummy html elements.

* Default $Computerese to "". I am not sure why it had the default
  it did, but the validator does not like the xml:lang tag and 'und'
  is not a valid language code.

* Changes to how list elements are output to ensure the right
  document hierarchy.

Other changes include

* Move the output of the version comment from do_beginning
  into do_middle so it is still output if bare_output is requested

* Added do_head so the a sub-class can add additional into into
  the header, eg a <link> for css

I am considering adding <div class=pod></div> around the output in
do_middle. search.cpan.org does this so that css can be applied only
to the pod. I may include that in my next patch, which is to look
at links and anchors.

Graham.
--- lib/Pod/Simple/HTML.pm.orig Thu Mar 13 08:42:30 2003
+++ lib/Pod/Simple/HTML.pm      Thu Mar 13 15:13:47 2003
@@ -3,17 +3,16 @@
 package Pod::Simple::HTML;
 use strict;
 use Pod::Simple::PullParser ();
-use vars qw(@ISA %Tagmap $Computerese $Lame $Linearization_Limit $VERSION);
+use vars qw(@ISA %Tagmap $Computerese $Linearization_Limit $VERSION);
 @ISA = ('Pod::Simple::PullParser');
-$VERSION = '1.03';
+$VERSION = '1.03_01';
 
 use UNIVERSAL ();
 sub DEBUG () {0}
 
 #use utf8;
 
-$Computerese =  " lang='und' xml:lang='und'" unless defined $Computerese;
-$Lame = ' class="pad"' unless defined $Lame;
+$Computerese =  "" unless defined $Computerese;
 
 $Linearization_Limit = 90 unless defined $Linearization_Limit;
  # headings/items longer than that won't get an <a name="...">
@@ -69,11 +68,11 @@
     ]  # no point in providing a way to get <q>...</q>, I think
   ),
   
-  '/item-bullet' => "</li><p$Lame></p>\n",
-  '/item-number' => "</li><p$Lame></p>\n",
-  '/item-text'   => "</a></dt><p$Lame></p>\n",
-  'Para_item'    => "\n<dd>",
-  '/Para_item'   => "</dd><p$Lame></p>\n",
+  '/item-bullet' => "</li>\n",
+  '/item-number' => "</li>\n",
+  '/item-text'   => "</a></dt>\n",
+  'item-body'    => "\n<dd>",
+  '/item-body'   => "</dd>\n",
 
   'B'      =>  "<b>",                  '/B'     =>  "</b>",
   'I'      =>  "<i>",                  '/I'     =>  "</i>",
@@ -192,7 +191,10 @@
 sub do_middle {      # the main work
   my $self = $_[0];
   my $fh = $self->{'output_fh'};
+  my $tagmap = $self->{'Tagmap'};
   
+  print $fh $self->version_tag_comment, "<!-- start doc -->\n";
+
   my($token, $type, $tagname);
   my @stack;
   my $dont_wrap = 0;
@@ -209,7 +211,11 @@
         }
 
       } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
-        print $fh $self->{'Tagmap'}{$tagname} || next;
+        if( $stack[-1] and $tagname eq 'item-text' ) {
+          print $fh $stack[-1];
+          $stack[-1] = '';
+        }
+        print $fh $tagmap->{$tagname} || next;
 
         my @to_unget;
         while(1) {
@@ -243,23 +249,34 @@
         next;
        
       } else {
-        if( $tagname =~ m/^over-(.+)$/s ) {
-          push @stack, $1;
-        } elsif( $tagname eq 'Para') {
-          $tagname = 'Para_item' if @stack and $stack[-1] eq 'text';
+        if( $tagname =~ m/^over-/s ) {
+          push @stack, '';
+        } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
+          print $fh $stack[-1];
+          $stack[-1] = '';
         }
-        print $fh $self->{'Tagmap'}{$tagname} || next;
+        print $fh $tagmap->{$tagname} || next;
         ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
       }
 
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     } elsif( $type eq 'end' ) {
       if( ($tagname = $token->tagname) =~ m/^over-/s ) {
-        pop @stack;
-      } elsif( $tagname eq 'Para' ) {
-        $tagname = 'Para_item' if @stack and $stack[-1] eq 'text';
+        if( my $end = pop @stack ) {
+          print $fh $end;
+        }
+      } elsif( $tagname =~ m/^item-/s and @stack) {
+        $stack[-1] = $tagmap->{"/$tagname"};
+        if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
+          $self->unget_token($next);
+          if( $next->type eq 'start' and $next->tagname !~ m/^item-/s ) {
+            print $tagmap->{"/item-text"},$tagmap->{"item-body"};
+            $stack[-1] = $tagmap->{"/item-body"};
+          }
+        }
+        next;
       }
-      print $fh $self->{'Tagmap'}{"/$tagname"} || next;
+      print $fh $tagmap->{"/$tagname"} || next;
       --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
 
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -275,22 +292,36 @@
 
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
+sub do_head {
+  my $self = $_[0];
+  esc(my $title = $self->{'Title'});
+
+  print {$self->{'output_fh'}} <<HEAD;
+<title>$title</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+HEAD
+
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
 sub do_beginning {
   my $self = $_[0];
 
-  my $title = $self->get_short_title();
+  $self->{'Title'} = $self->get_short_title();
   unless($self->content_seen) {
     DEBUG and print "No content seen in search for title.\n";
     return;
   }
-  $self->{'Title'} = $title;
 
-  esc($title);
   print {$self->{'output_fh'}}
-   "<html><head>\n<title>$title</title>\n</head>\n<body>\n", 
-   $self->version_tag_comment,
-   "<!-- start doc -->\n",
-  ;
+    qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n},
+    qq{  "http://www.w3.org/TR/html4/strict.dtd";>\n},
+    qq{<html><head>\n};
+
+  $self->do_head;
+  print {$self->{'output_fh'}} "</head><body>\n";
+
    # TODO: more configurability there
 
   DEBUG and print "Returning from do_beginning...\n";

Reply via email to