On Tue, 5 Sep 2000, Barrie Slaymaker wrote: BS>Brad Appleton wrote: BS>> BS>> Marek has been doing similar work on an experimental BS>> Pod::Compiler module that sounds suspiciously like your BS>> Pod::StdParser. Definitely make sure you look at each BS>> other's code before going too much further. BS> BS>I haven't seen it published anywhere, but his previous description sounds very much BS>DOMish. This isn't DOMish: it doesn't build a tree of objects, it is a lot like BS>Pod::Parser in that it generates events during parsing and provides an OO scaffolding BS>for handling those events (by overloading at this time, perhaps by callback later). I see your point, but if you think of complex converters like HTML, FrameMaker (MIF) or LaTeX, then it is *extremely* handy to have a tree of objects, so you always have the full information about nesting, sisters etc easily accessible. And for hyperlinking you need two passes anyway, and thanks to Storable you can conveniently store the whole tree and run the second pass over it as soon as all hyperlink destinations have been computed. I don't want to go into the details, but I have certainly a lot of pros for an OO approach. I haven't coded Pod::Compiler into my Marek::Pod::HTML yet, but I expect a dramatic increase in performance when converting e.g. the whole Perl distribution PODs. There are two differing requirements here: On the one hand, we need a fast! parser for online viewing, on the other a base framework for complex converters to avoid coding the same all over again. BS>If Marek will point me to his code, I'll gladly peruse it. I've included my 0.01 of Pod::Compiler and a sample script (sorry, no module package) for you to have a look at. Yes, there are a lot of TODOs. BS>Do you have any feedback on Pod::StdParser & Pod::Tests in the meanwhile? I've had a brief look: Yes, it is a sensible extension of Pod::Parser, but it uses callbacks rather than creating a tree. The functionality of Pod::StdParser could quite easily be included in Pod::Compiler, IMHO. The big question is: Could the tree-approach satisfy all requirements or do we need separate, highly optimized converters for every single output format? -Marek
#!/opt/perl_5.6.0/bin/perl -w use strict; use Marek::Pod::Compiler qw(pod_compile); my $root = pod_compile(shift @ARGV, \*STDOUT); $root->walk_down({ callback => \&_disp, _depth => 0 }); my $file = "/tmp/pod$$"; print "storing to $file\n"; $root->store($file); print "retrieving $file\n"; my $newroot = Pod::root->read($file); print $newroot->as_pod,"\n"; print "\n\nNODES:\n"; my %nodes = $newroot->nodes(); foreach(keys %nodes) { print " $_ $nodes{$_}\n"; } sub _disp { my ($node,$href) = @_; my $indent = ' ' x $href->{_depth}; print $indent,ref($node),"\n"; 1; }
# -*- perl -*- ############################################################################# # Pod/Compiler.pm -- compiles POD into an object tree # # Copyright (C) 2000 by Marek Rouchal. All rights reserved. # This package is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. ############################################################################# package Marek::Pod::Compiler; =head1 NAME Marek::Pod::Compiler - compile POD into an object tree =head1 SYNOPSIS use Marek::Pod::Compiler; =head1 DESCRIPTION THIS IS PRELIMINARY SOFTWARE! The Marek:: namespace is strictly preliminary until a regular place in CPAN is found. =cut use strict; require Exporter; require Pod::Parser; require Tree::DAG_Node; #$Tree::DAG_Node::Debug = 1; require Storable; $Marek::Pod::Compiler::VERSION = '0.01'; @Marek::Pod::Compiler::ISA = qw(Exporter Pod::Parser); @Marek::Pod::Compiler::EXPORT = qw(); @Marek::Pod::Compiler::EXPORT_OK = qw(pod_compile); ############################################################################## sub pod_compile { my ($infile) = @_; my $compiler = Marek::Pod::Compiler->new(); $compiler->parse_from_file($infile,\*STDOUT); $compiler->root(); } ##------------------------------- ## Method definitions begin here ##------------------------------- # set up a new object sub new { my $this = shift; my $class = ref($this) || $this; my %params = @_; my $self = {%params}; bless $self, $class; $self->initialize(); return $self; } # initalize, set defaults sub initialize { my $self = shift; ## Options # the POD name $self->{name} ||= ''; # no parser errors here, we've seen them in the first pass $self->SUPER::errorsub(sub { return 1; }); } =item name() Set/retrieve the C<-name> property, i.e. the canonical Pod name (e.g. C<Pod::HTML>). =cut # set and/or retrieve canonical name of POD sub name { return (@_ > 1) ? ($_[0]->{name} = $_[1]) : $_[0]->{name}; } sub root { return (@_ > 1) ? ($_[0]->{_root} = $_[1]) : $_[0]->{_root}; } sub current { return (@_ > 1) ? ($_[0]->{_current} = $_[1]) : $_[0]->{_current}; } ## overrides for Pod::Parser # things to do at start of POD sub begin_input { my $self = shift; $self->current($self->root(Pod::root->new())); } # things to do at end of POD sub end_pod { my $self = shift; 1; } # expand a POD command sub command { my ($self, $cmd, $paragraph, $line, $pod_para) = @_; my ($file) = $pod_para->file_line; # Check the command syntax $paragraph =~ s/\s+$//s; if($cmd eq 'over') { # check argument my $indent = 4; # default if($paragraph =~ /^\s*(\d+)$/s) { $indent = $1; } else { warn "Not a numeric argument for =over: '$paragraph'\n"; } # start a new list my $list = Pod::list->new; $list->indent($indent); $list->start($line); # remember start line of list $self->current->add_daughter($list); $self->current($list); # make list current context } elsif($cmd eq 'item') { # are we in a list? my $current = $self->current; if($current->isa('Pod::begin')) { $current = $current->mother; } unless($current->isa('Pod::list')) { warn "=item without previous =over\n"; # auto-open a list my $list = Pod::list->new; $list->start($line); $list->autoopen(1); $self->current->add_daughter($list); $self->current($list); } my $list = $self->current; # check whether the previous item had some contents my $last_item = ($list->daughters)[-1]; if($last_item && $last_item->isa('Pod::item') && !($last_item->daughters)) { warn "previous =item has no contents\n"; } # TODO non-items in =over/=back # check argument # TODO killing of first characters according to list type?!? my $type = 'definition'; if($paragraph =~ /\S/s) { if($paragraph =~ s/^\s*[*](\s+|$)//s) { $type = 'bullet'; } elsif($paragraph =~ s/^\s*([1-9]\d*)\.?(\s+|$)//s) { # TODO verify numbering $type = "number"; } } else { warn "No argument for =item\n"; } my $first = $list->type(); unless($first) { $list->type($type); } elsif($first ne $type) { warn "=item type mismatch ('$first' vs. '$type')\n"; $list->type($type) if($type =~ /^definition/ || ($type =~ /^number/ && $first =~ /^bullet/)); } # add this item my $item = Pod::item->new; $item->add_daughters($self->interpolate($paragraph, $line, $file)); $list->add_daughter($item); if($first =~ /^definition/ && $item->contents_as_text !~ /\S/s) { warn "no text content in =item\n"; } } elsif($cmd eq 'back') { # check if we have an open list unless($self->current->isa('Pod::list')) { warn "=back without previous =over\n"; } else { # check for spurious characters if($paragraph =~ /\S/s) { warn "Spurious character(s) after =back\n"; } # close list my $list = $self->current; $self->current($list->mother); # check for empty lists if(!($list->daughters)) { warn "No contents in =over/=back list\n"; } } # end in_list } elsif($cmd =~ /^head([1-9]\d*)/) { my $level = $1; # TODO check whether the previous =head section had some contents # - keep track of current head contents and level # and compare with this one # "empty section in previous paragraph" # check if there is an open list my $current = $self->current; while($current->isa('Pod::list')) { unless($current->autoopen) { warn "=over on line ". $current->start() . " without closing =back (at $cmd)\n"; } $current = $self->current($current->mother); } # check contents my $head = Pod::head->new($level); $head->add_daughters($self->interpolate($paragraph,$line,$file)); if(length($head->contents_as_text)) { $current->add_daughter($head); # TODO remember current heading } else { warn "empty =$cmd\n"; } } elsif($cmd eq 'begin') { if($self->current->isa('Pod::begin')) { # already have a begin warn "Nested =begin's (first at line TODO start)\n"; } else { # check for argument my ($type,$args); if($paragraph =~ /^\s*(\S+)\s*(.*)/) { ($type,$args) = ($1,$2); } else { warn "No argument for =begin\n"; } # remember the =begin my $begin = Pod::begin->new; $begin->type($type); $begin->args($args); $begin->line($line); $self->current->add_daughter($begin); $self->current($begin); } } elsif($cmd eq 'end') { if($self->current->isa('Pod::begin')) { # close the existing =begin $self->current($self->current->mother); # check for spurious characters # the closing argument is optional # TODO check opening/closing types if($paragraph =~ /^\s*(\S+)\s*(.*)$/ && $2) { warn "Spurious character(s) after '=end $1'\n"; } } else { # don't have a matching =begin warn "=end without =begin\n"; } } elsif($cmd eq 'for') { my ($type,$args); if($paragraph =~ s/^\s*(\S+)[ \t]*([^\n]*)\n*//s) { ($type,$args) = ($1,$2); $args =~ s/\s+$//s; } else { warn "=for without formatter specification\n"; $paragraph = ''; # do not expand paragraph below } if($paragraph =~ /\S/s) { my $forp = Pod::for->new; $forp->type($type); $forp->args($args); $forp->content($paragraph); # TODO check if in list?!? $self->current->add_daughter($forp); } } elsif($cmd =~ /^(pod|cut)$/) { # check for argument if($paragraph =~ /\S/) { warn "Spurious text after =$cmd\n"; } } else { warn "Invalid pod command '$cmd'\n"; } } # process a verbatim paragraph sub verbatim { my ($self, $paragraph, $line, $pod_para) = @_; my ($file) = $pod_para->file_line; # strip trailing whitespace $paragraph =~ s/\s+$//s; unless($paragraph =~ /\S/) { # just an empty line return 1; } # make verbatim par my $verbobj; my $lastobj = ($self->current->daughters)[-1]; if($lastobj && $lastobj->isa('Pod::verbatim')) { # recycle previous verbatim paragraph $verbobj = $lastobj; $verbobj->addline(''); } else { $verbobj = Pod::verbatim->new; $self->current->add_daughter($verbobj); } foreach(split(/[\r\n]+/, $paragraph)) { s/\s*$//s; $verbobj->addline($_); } 1; } # a regular text paragraph sub textblock { my ($self, $paragraph, $line, $pod_para) = @_; my ($file) = $pod_para->file_line; $paragraph =~ s/\s+$//s; my $current = $self->current; # TODO check context (list, begin) if($current->isa('Pod::begin')) { $current->addchunk($paragraph); } else { my $par = Pod::paragraph->new; $par->add_daughters($self->interpolate($paragraph,$line,$file)); # check for non-empty content if($par->contents_as_text =~ /\S/) { $current->add_daughter($par); } else { warn "no text contents in paragraph\n"; } } } # expand a POD text string sub interpolate { my ($self,$paragraph,$line,$file) = @_; # expand the interior sequences $self->_expand_ptree($self->parse_text($paragraph,$line), $line, $file, ''); } sub _expand_ptree { my ($self,$ptree,$line,$file,$nestlist) = @_; local($_); my @obj = (); # process each node in the parse tree foreach(@$ptree) { # regular text chunk unless(ref) { my $chunk = $_; # TODO do magic linebreaking #while($chunk =~ s/^([^\n]*)\n([ \t]+)//) { # my ($line,$indent) = ($1,$2); #} # TODO escape whitespace if in S<> #if($nestlist =~ /S/) { #} my $str = Pod::string->new($chunk); $str->nbsp(1) if($nestlist =~ /S/); $str->nestlist($nestlist); push(@obj, $str); next; # finished this chunk } # have an interior sequence my $cmd = $_->cmd_name(); my $contents = $_->parse_tree(); my $file; ($file,$line) = $_->file_line(); # an entity if($cmd eq 'E') { my $str = $contents->raw_text(); my $ent = Pod::entity->decode($str); # TODO recyle string if "normal" ascii character unless($ent) { warn "unrecognized entity '$str'\n"; } else { push(@obj, $ent); } } # a hyperlink elsif($cmd eq 'L') { # try to parse the hyperlink $_ = $contents->raw_text(); # collapse newlines with whitespace if(s/\s*\n+\s*/ /g) { warn "collapsing newlines to blanks\n"; } # strip leading/trailing whitespace if(s/^[\s\n]+|[\s\n]+$//) { warn "ignoring leading/trailing whitespace in link\n"; } unless(length($_)) { warn "ERROR: empty link\n"; next; } # Check for different possibilities. This is tedious and error-prone # we match all possibilities (alttext, page, section/item) #warn "DEBUG: link=$_\n"; # only page # problem: a lot of people use (), or (1) or the like to indicate # man page sections. But this collides with L<func()> that is supposed # to point to an internal funtion... my $page_rx = '[\w.]+(?:::[\w.]+)*(?:[(](?:\d\w*|)[)]|)'; my $url_rx = '(?:http|ftp|mailto|news):.+'; my ($alttext,$page,$type,$node); if(m!^($page_rx)$!o) { $page = $1; $type = 'page'; } # alttext, page and "section" elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) { ($alttext, $page, $node) = ($1, $2, $3); $type = 'section'; } # alttext and page elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) { ($alttext, $page) = ($1, $2); $type = 'page'; } # alttext and "section" elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) { ($alttext, $node) = ($1,$2); $type = 'section'; } # page and "section" elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) { ($page, $node) = ($1, $2); $type = 'section'; } # page and item elsif(m!^($page_rx)\s*/\s*(.+)$!o) { ($page, $node) = ($1, $2); $type = 'item'; } # only "section" elsif(m!^/?"(.+)"$!) { $node = $1; $type = 'section'; } # only item elsif(m!^\s*/(.+)$!) { $node = $1; $type = 'item'; } # non-standard: URL elsif(m!^($url_rx)$!io) { $node = $1; $type = 'url'; } # alttext, page and item elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) { ($alttext, $page, $node) = ($1, $2, $3); $type = 'item'; } # alttext and item elsif(m!^(.*?)\s*[|]\s*/(.+)$!) { ($alttext, $node) = ($1,$2); } # nonstandard: alttext and url elsif(m!^(.*?)\s*[|]\s*($url_rx)$!o) { ($alttext, $node) = ($1,$2); $type = 'url'; } # must be an item or a "malformed" section (without "") else { $node = $_; $type = 'item'; } # empty alternative text expands to node name or page name if(defined $alttext) { if(!length($alttext)) { $alttext = $node | $page; } elsif($alttext =~ m:[|/]:) { warn "alternative text '$alttext' contains non-escaped | or /\n"; } } if(defined $page && $page =~ /[(]\w*[)]$/) { warn "(section) in '$page' deprecated\n"; } if(defined $node && $node =~ m:[|/]:) { warn "node '$node' contains non-escaped | or /\n"; } my $link = Pod::link->new; if(defined $alttext) { my @txt = $self->interpolate($alttext,$line,$file); $link->alttext(@txt); } $link->page($page) if($page); if($node) { # collapse markup my $txt = join('', map { $_->as_text } $self->interpolate($node,$line,$file)); $txt =~ s/\s+/ /gs; # collapse whitespace $txt =~ s/^\s+|\s+$//gs; # trim whitespace $link->node($node); } $link->type($type); push(@obj, $link); } # bold text elsif($cmd eq 'B') { if($nestlist =~ /B/) { warn "...B<...B<...>...>...\n"; } my $bold = Pod::bold->new; $bold->nestlist($nestlist); $bold->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'B')); push(@obj, $bold); } # code text elsif($cmd eq 'C') { if($nestlist =~ /C/) { warn "...C<...C<...>...>...\n"; } my $code = Pod::code->new; $code->nestlist($nestlist); $code->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'C')); push(@obj, $code); } # file text elsif($cmd eq 'F') { if($nestlist =~ /F/) { warn "...F<...F<...>...>...\n"; } my $file = Pod::file->new; $file->nestlist($nestlist); $file->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'F')); push(@obj, $file); } # italic text elsif($cmd eq 'I') { # TODO I<...I<...>...> should be expanded to # <I>...</I>...<I>...</I> - according to Achim Bohnet if($nestlist =~ /I/) { warn "...I<...I<...>...>...\n"; } my $italic = Pod::italic->new; $italic->nestlist($nestlist); $italic->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'I')); push(@obj, $italic); } # non-breakable space elsif($cmd eq 'S') { # note: sets also special flag in strings if($nestlist =~ /S/) { warn "...S<...S<...>...>...\n"; } my $nbsp = Pod::nonbreaking->new; $nbsp->nestlist($nestlist); $nbsp->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'S')); push(@obj, $nbsp); } # zero-size element elsif($cmd eq 'Z') { # TODO check that it has no content my $zero = Pod::zero->new; $zero->nestlist($nestlist); push(@obj, $zero); } # custom index entries # TODO these should run also throgh Pod::Checker and result in # valid L<...> destinations elsif($cmd eq 'X') { # tag this with a unique identifier and add it to the index if($nestlist =~ /X/) { warn "...X<...X<...>...>...\n"; } my $idx = Pod::idx->new; $idx->nestlist($nestlist); $idx->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'X')); push(@obj, $idx); } else { # ignore everything else warn "Invalid command '$cmd'\n"; } } # end for ptree @obj; } =head1 SEE ALSO L<Pod::Checker>, L<Pod::Parser>, L<Pod::Find>, L<pod2man>, L<pod2text>, L<Pod::Man> =head1 AUTHOR Marek Rouchal E<lt>[EMAIL PROTECTED]<gt> =head1 HISTORY A big deal of this code has been recycled from a variety of existing Pod converters, e.g. by Tom Christiansen and Russ Allbery. A lot of ideas came from Nick Ing-Simmons' B<PodToHtml>, e.g. the usage of the B<HTML::Element> module and the B<HTML::FormatPS> module. Without the B<Pod::Parser> module by Brad Appleton and the B<HTML::Element> module by Gisle Aas this module would not exist. =cut ############################################################################## package Pod::_obj; @Pod::_obj::ISA = qw(Tree::DAG_Node); sub new { my $this = shift; my $class = ref($this) || $this; my @params = @_; my $self = $class->SUPER::new; bless $self, $class; $self->initialize(@params); return $self; } sub initialize { 1; } sub line { return (@_ > 1) ? ($_[0]->{_line} = $_[1]) : $_[0]->{_line}; } sub contents_as_pod($) { $_[0]->_as_pod; } sub _as_pod($;$$) { my ($self,$first,$last) = @_; my $str = defined $first ? $first : ''; foreach($self->daughters) { $str .= $_->as_pod; } $str .= defined $last ? $last : ''; $str; } sub contents_as_text($) { my $text = $_[0]->_as_text; $text =~ s/^\s+|\s+$//sg; $text =~ s/\s+/ /sg; $text; } sub _as_text($$) { my ($self,$first,$last) = @_; my $text = $first || ''; foreach($self->daughters) { $text .= $_->as_text; } $text .= $last || ''; $text; } ############################################################################## package Pod::root; @Pod::root::ISA = qw(Pod::_obj Storable); sub initialize { 1; } sub as_pod($) { shift->_as_pod()."=cut\n"; } sub as_text($) { shift->_as_text(''); } sub read($$) { my ($pseudoclass,$file) = @_; my $obj = Storable::retrieve($file); $obj; } sub nodes { my $self = shift; my %nodes = (); $self->walk_down({ callback => \&_get_nodes, _depth => 0, _nodes => \%nodes }); %nodes; } sub _get_nodes { my ($node,$href) = @_; if($node->isa("Pod::head")) { my $txt = $node->as_text; $href->{_nodes}->{$txt} = 1; return 0; } elsif($node->isa("Pod::item")) { my $txt = $node->as_text; $href->{_nodes}->{$txt} = 2; return 0; } elsif($node->isa("Pod::idx")) { my $txt = $node->as_text; $href->{_nodes}->{$txt} = 3; return 0; } 1; } ############################################################################## package Pod::paragraph; @Pod::paragraph::ISA = qw(Pod::_obj); sub initialize { 1; } sub as_pod($) { my $self = shift; $self->_as_pod($self->my_daughter_index?'':"=pod\n\n","\n\n"); } sub as_text($) { my $self = shift; $self->_as_text(''); } ############################################################################## package Pod::verbatim; @Pod::verbatim::ISA = qw(Pod::_obj); sub initialize { shift->{_content} = []; } sub addline($$) { push(@{$_[0]->{_content}}, $_[1]); } sub content($@) { my $self = shift; if(@_) { if(defined $_[0]) { @{$self->{_content}} = @_; } else { $self->{_content} = []; } } @{$self->{_content}}; } sub as_pod($) { my $self = shift; ($self->my_daughter_index?'':"=pod\n\n"). join("\n",$self->content)."\n\n"; } sub as_text($) { my $self = shift; join("\n",$self->content); } ############################################################################## package Pod::head; @Pod::head::ISA = qw(Pod::_obj); sub initialize { $_[0]->level($_[1] || 1); } sub level($$) { return (@_ > 1) ? ($_[0]->{_level} = $_[1]) : $_[0]->{_level}; } sub as_pod($) { my $self = shift; $self->_as_pod("=head".$self->level." ","\n\n"); } sub as_text($) { my $self = shift; $self->_as_text(''); } ############################################################################## package Pod::list; @Pod::list::ISA = qw(Pod::_obj); sub initialize { $_[0]->autoopen(0); $_[0]->type(''); $_[0]->indent(4); } sub autoopen($$) { return (@_ > 1) ? ($_[0]->{_auto} = $_[1]) : $_[0]->{_auto}; } sub indent($$) { return (@_ > 1) ? ($_[0]->{_indent} = $_[1]) : $_[0]->{_indent}; } sub start($$) { return (@_ > 1) ? ($_[0]->{_start} = $_[1]) : $_[0]->{_start}; } sub type($$) { return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type}; } sub as_pod($) { my $self = shift; $self->_as_pod("=over ".$self->indent."\n\n","=back\n\n") } sub as_text($) { my $self = shift; $self->_as_text(''); } ############################################################################## package Pod::item; @Pod::item::ISA = qw(Pod::_obj); sub as_pod($) { my $self = shift; my $mum = $self->mother; my $prefix = ''; if($mum) { my $type = $mum->type; if($type =~ /^bullet/) { $prefix = '*'; } elsif($type =~ /^number/) { $prefix = ($self->my_daughter_index+1).'.'; } } my $contents = $self->contents_as_pod; "=item $prefix".($contents?" $contents":'')."\n\n"; } sub as_text($) { my $self = shift; $self->_as_text(''); } ############################################################################## package Pod::begin; @Pod::begin::ISA = qw(Pod::_obj); sub initialize { $_[0]->type('unknown'); $_[0]->args(''); $_[0]->line(0); $_[0]->{_chunks} = []; } sub type($$) { return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type}; } sub args($$) { return (@_ > 1) ? ($_[0]->{_args} = $_[1]) : $_[0]->{_args}; } sub start($$) { return (@_ > 1) ? ($_[0]->{_start} = $_[1]) : $_[0]->{_start}; } sub addchunk($$) { push(@{$_[0]->{_chunks}},$_[1]); } sub contents($) { return @{shift->{_chunks}}; } sub as_pod($) { my $self = shift; "=begin ".$self->type.($self->args ? " ".$self->args:'')."\n\n". join("\n",@{$self->{_chunks}})."\n\n=end\n\n"; } sub as_text($) { my $self = shift; # TODO not really applicable, is it? ''; } ############################################################################## package Pod::for; @Pod::for::ISA = qw(Pod::_obj); sub initialize { $_[0]->type('unknown'); $_[0]->args(''); $_[0]->line(0); $_[0]->{_chunks} = []; } sub type($$) { return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type}; } sub args($$) { return (@_ > 1) ? ($_[0]->{_args} = $_[1]) : $_[0]->{_args}; } sub content($) { return (@_ > 1) ? ($_[0]->{_content} = $_[1]) : $_[0]->{_content}; } sub as_pod($) { my $self = shift; $self->_as_pod("=for ".$self->type.($self->args ? " ".$self->args:'')."\n". $self->content)."\n\n"; } sub as_text($) { my $self = shift; # TODO not really applicable, is it? ''; } ############################################################################## package Pod::_text; @Pod::_text::ISA = qw(Pod::_obj); sub nestlist($$) { return (@_ > 1) ? ($_[0]->{_nestlist} = $_[1]) : $_[0]->{_nestlist}; } ############################################################################## package Pod::string; @Pod::string::ISA = qw(Pod::_text); sub initialize { $_[0]->content(defined $_[1] ? $_[1] : ''); } sub content($$) { return (@_ > 1) ? ($_[0]->{_content} = $_[1]) : $_[0]->{_content}; } sub nbsp($$) { return (@_ > 1) ? ($_[0]->{_nbsp} = $_[1]) : $_[0]->{_nbsp}; } sub as_pod($) { my $self = shift; # TODO deal with entities $self->content; } sub as_text($) { my $self = shift; # TODO deal with entities $self->content; } ############################################################################## package Pod::bold; @Pod::bold::ISA = qw(Pod::_text); sub as_pod($) { my $self = shift; # TODO deal with entities 'B<'.$self->contents_as_pod.'>'; } sub as_text($) { shift->contents_as_text; } ############################################################################## package Pod::italic; @Pod::italic::ISA = qw(Pod::_text); sub as_pod($) { my $self = shift; # TODO deal with entities 'I<'.$self->contents_as_pod.'>'; } sub as_text($) { shift->contents_as_text; } ############################################################################## package Pod::code; @Pod::code::ISA = qw(Pod::_text); sub as_pod($) { my $self = shift; # TODO deal with entities 'C<'.$self->contents_as_pod.'>'; } sub as_text($) { shift->contents_as_text; } ############################################################################## package Pod::file; @Pod::file::ISA = qw(Pod::_text); sub as_pod($) { my $self = shift; # TODO deal with entities 'F<'.$self->contents_as_pod.'>'; } sub as_text($) { shift->contents_as_text; } ############################################################################## package Pod::nonbreaking; @Pod::nonbreaking::ISA = qw(Pod::_text); sub as_pod($) { my $self = shift; # TODO deal with entities 'S<'.$self->contents_as_pod.'>'; } sub as_text($) { shift->contents_as_text; } ############################################################################## package Pod::zero; @Pod::zero::ISA = qw(Pod::_text); sub as_pod($) { 'Z<>'; } sub as_text($) { ''; } ############################################################################## package Pod::idx; @Pod::idx::ISA = qw(Pod::_text); sub as_pod($) { my $self = shift; # TODO deal with entities 'X<'.$self->contents_as_pod.'>'; } sub as_text($) { shift->contents_as_text; } ############################################################################## package Pod::entity; @Pod::entity::ISA = qw(Pod::_text); # stolen from HTML::Entities my %ENTITIES = ( # Some normal chars that have special meaning in SGML context amp => '&', # ampersand 'gt' => '>', # greater than 'lt' => '<', # less than quot => '"', # double quote # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML AElig => 'Æ', # capital AE diphthong (ligature) Aacute => 'Á', # capital A, acute accent Acirc => 'Â', # capital A, circumflex accent Agrave => 'À', # capital A, grave accent Aring => 'Å', # capital A, ring Atilde => 'Ã', # capital A, tilde Auml => 'Ä', # capital A, dieresis or umlaut mark Ccedil => 'Ç', # capital C, cedilla ETH => 'Ð', # capital Eth, Icelandic Eacute => 'É', # capital E, acute accent Ecirc => 'Ê', # capital E, circumflex accent Egrave => 'È', # capital E, grave accent Euml => 'Ë', # capital E, dieresis or umlaut mark Iacute => 'Í', # capital I, acute accent Icirc => 'Î', # capital I, circumflex accent Igrave => 'Ì', # capital I, grave accent Iuml => 'Ï', # capital I, dieresis or umlaut mark Ntilde => 'Ñ', # capital N, tilde Oacute => 'Ó', # capital O, acute accent Ocirc => 'Ô', # capital O, circumflex accent Ograve => 'Ò', # capital O, grave accent Oslash => 'Ø', # capital O, slash Otilde => 'Õ', # capital O, tilde Ouml => 'Ö', # capital O, dieresis or umlaut mark THORN => 'Þ', # capital THORN, Icelandic Uacute => 'Ú', # capital U, acute accent Ucirc => 'Û', # capital U, circumflex accent Ugrave => 'Ù', # capital U, grave accent Uuml => 'Ü', # capital U, dieresis or umlaut mark Yacute => 'Ý', # capital Y, acute accent aacute => 'á', # small a, acute accent acirc => 'â', # small a, circumflex accent aelig => 'æ', # small ae diphthong (ligature) agrave => 'à', # small a, grave accent aring => 'å', # small a, ring atilde => 'ã', # small a, tilde auml => 'ä', # small a, dieresis or umlaut mark ccedil => 'ç', # small c, cedilla eacute => 'é', # small e, acute accent ecirc => 'ê', # small e, circumflex accent egrave => 'è', # small e, grave accent eth => 'ð', # small eth, Icelandic euml => 'ë', # small e, dieresis or umlaut mark iacute => 'í', # small i, acute accent icirc => 'î', # small i, circumflex accent igrave => 'ì', # small i, grave accent iuml => 'ï', # small i, dieresis or umlaut mark ntilde => 'ñ', # small n, tilde oacute => 'ó', # small o, acute accent ocirc => 'ô', # small o, circumflex accent ograve => 'ò', # small o, grave accent oslash => 'ø', # small o, slash otilde => 'õ', # small o, tilde ouml => 'ö', # small o, dieresis or umlaut mark szlig => 'ß', # small sharp s, German (sz ligature) thorn => 'þ', # small thorn, Icelandic uacute => 'ú', # small u, acute accent ucirc => 'û', # small u, circumflex accent ugrave => 'ù', # small u, grave accent uuml => 'ü', # small u, dieresis or umlaut mark yacute => 'ý', # small y, acute accent yuml => 'ÿ', # small y, dieresis or umlaut mark # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) copy => '©', # copyright sign reg => '®', # registered sign nbsp => "\240", # non breaking space # Additional ISO-8859/1 entities listed in rfc1866 (section 14) iexcl => '¡', cent => '¢', pound => '£', curren => '¤', yen => '¥', brvbar => '¦', sect => '§', uml => '¨', ordf => 'ª', laquo => '«', 'not' => '¬', # not is a keyword in perl shy => '', macr => '¯', deg => '°', plusmn => '±', sup1 => '¹', sup2 => '²', sup3 => '³', acute => '´', micro => 'µ', para => '¶', middot => '·', cedil => '¸', ordm => 'º', raquo => '»', frac14 => '¼', frac12 => '½', frac34 => '¾', iquest => '¿', 'times' => '×', # times is a keyword in perl divide => '÷', # some POD special entities verbar => '|', sol => '/' ); sub decode { my ($class,$str) = @_; my $ent = $class->new; $str =~ s/^\s+|\s+$//sg; my $value; if($str =~ /^(0x[0-9a-f]+)$/i) { # hexadecimal $value = hex($1); } elsif($str =~ /^(0[0-7]+)$/) { # octal $value = oct($1); } elsif($str =~ /^(\d+)$/) { # decimal $value = $1; } elsif($str =~ /^(\w+)$/i) { if(my $ent = $ENTITIES{$1}) { $value = ord($ent); } } return undef unless($value); $ent->value($value); $ent; } sub initialize { $_[0]->value(defined $_[1] ? $_[1] : ''); } sub value($$) { # value is number in ISO-8859-1 return (@_ > 1) ? ($_[0]->{_value} = $_[1]) : $_[0]->{_value}; } sub as_pod($) { my $self = shift; # TODO deal with nonbreaking space my $value = $self->value; my $chr = chr($value); my ($ent) = grep($_->[1] eq $chr, map { [ $_ => $ENTITIES{$_} ] } keys %ENTITIES); $ent = $ent ? $ent->[0] : $value; "E<$ent>"; } sub as_text($) { chr(shift->value); } ############################################################################## package Pod::link; @Pod::link::ISA = qw(Pod::_text); =head2 Pod::link B<Pod::link> is a class for manipulation of POD hyperlinks. Usage: my $link = Pod::link->new('alternative text|page/"section in page"'); The B<Pod::link> class is mainly designed to parse the contents of the C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the different parts of a POD hyperlink for further processing. It can also be used to construct hyperlinks. =over 4 =item Pod::link-E<gt>new() The B<new()> method can either be passed a set of key/value pairs or a single scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object of the class C<Pod::link> is returned. The value C<undef> indicates a failure, the error message is stored in C<$@>. =cut use Carp; # TODO node/item as marked-up text # ...but additional method node_text which is plain ISO-8859-1 sub initialize { my $self = shift; $self->{_line} ||= undef; $self->{_file} ||= undef; $self->{_page} ||= ''; $self->{_node} ||= ''; $self->{_alttext} ||= []; $self->{_type} ||= undef; if(defined $_[0]) { if(ref($_[0])) { # called with a list of parameters %$self = (%$self, %{$_[0]}); } } $self; } =item $link-E<gt>text() This method returns the textual representation of the hyperlink as above, but without markers (read only). Depending on the link type this is one of the following alternatives (the + and * denote the portions of the text that are marked up): the +perl+ manpage the *$|* entry in the +perlvar+ manpage the section on *OPTIONS* in the +perldoc+ manpage the section on *DESCRIPTION* elsewhere in this document =cut # The complete link's text sub as_text { my $self = shift; my @alttext = $self->alttext; if(@alttext) { return join('', map { $_->as_text } @alttext); } my $type = $self->{_type}; my $section = $self->{_node}; if($type eq 'url') { return $section; } my $page = $self->{_page}; my $page_ext = ''; $page =~ s/([(]\w*[)])$// && ($page_ext = $1); (!$section ? '' : $type eq 'item' ? "the $section entry" : "the section on $section" ) . ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" : ' elsewhere in this document'); } =item $link-E<gt>markup($str_method,$page_cb,$node_cb) Markup the link with appropriate strings. The textual objects within the link are expanded with the I<$method> method. =cut sub markup { my ($self,$method,$page_cb,$node_cb) = @_; my @alttext = $self->alttext; my $node = $self->{_node}; my $type = $self->{_type}; my $page = $self->{_page}; if(@alttext) { my ($obj,$callback); if($node) { $callback = $node_cb || $page_cb; } else { $callback = $page_cb || $node_cb; } return &$callback($self, @alttext); } if($type eq 'url') { my $callback = $node_cb || $page_cb; return &$callback($self, Pod::string->new($node)); } my $page_ext = ''; $page =~ s/([(]\w*[)])$// && ($page_ext = $1); unless($node) { # we must have a page my $pre = Pod::string->new("the "); my $post = Pod::string->new(" manpage"); my $callback = $page_cb || $node_cb; return($pre->$method,&$callback($self, Pod::string->new($page)),$post->$method); } else { my @conv; if($type eq 'item') { @conv = ( (Pod::string->new("the "))->$method, &$node_cb($self,Pod::string->new($node)), (Pod::string->new(" entry"))->$method ); } else { @conv = ( (Pod::string->new("the section on "))->$method, &$node_cb($self,Pod::string->new($node))); } if($page) { push(@conv, (Pod::string->new("the "))->$method, &$page_cb($self, Pod::string->new($page)), ($page_ext ? (Pod::string->new($page_ext))->$method : ()), (Pod::string->new(" manpage"))->$method); } else { push(@conv, (Pod::string->new(" elsewhere in this document"))->$method); } return @conv; } } =item $link-E<gt>file() =item $link-E<gt>line() Just simple slots for storing information about the line and the file the link was encountered in. Has to be filled in manually. =cut # The line in the file the link appears sub line { return (@_ > 1) ? ($_[0]->{_line} = $_[1]) : $_[0]->{_line}; } # The POD file name the link appears in sub file { return (@_ > 1) ? ($_[0]->{_file} = $_[1]) : $_[0]->{_file}; } =item $link-E<gt>page() This method sets or returns the POD page this link points to. =cut # The POD page the link appears on sub page { if (@_ > 1) { $_[0]->{_page} = $_[1]; } $_[0]->{_page}; } =item $link-E<gt>node() As above, but the destination node text of the link. =cut # The link destination sub node { if (@_ > 1) { $_[0]->{_node} = $_[1]; } $_[0]->{_node}; } =item $link-E<gt>alttext() Sets or returns an alternative text specified in the link. =cut # Potential alternative text sub alttext { my $self = shift; if (@_) { $self->{_alttext} = [ @_ ]; } @{$self->{_alttext}}; } =item $link-E<gt>type() The node type, either C<section> or C<item>. As an unofficial type, there is also C<url>, derived from e.g. C<LE<lt>http://perl.comE<gt>> =cut # The type: item or headn sub type { return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; } =item $link-E<gt>as_pod() Returns the link as contents of C<LE<lt>E<gt>>. =back =cut # The link itself sub as_pod { my $self = shift; my $link = $self->page() || ''; my $node = $self->node(); if($node) { $node =~ s/\|/E<verbar>/g; $node =~ s:/:E<sol>:g; if($self->type() eq 'section') { $link .= ($link ? '/' : '') . qq{"$node"}; } elsif($self->type() eq 'url') { $link = $self->node(); # TODO to escape or not to escape? } else { # item $link .= '/' . $node; } } my @txt = $self->alttext(); if(@txt) { my $text = join('', map { $_->as_pod } @txt); $text =~ s/\|/E<verbar>/g; $text =~ s:/:E<sol>:g; $link = "$text|$link"; } "L<$link>"; } ############################################################################## 1;