Re: [p5pod] [ANNOUNCE] Pod::StdParser, Pod::Tests proof of concept

Marek Rouchal DAT CAD HW Tel 25849 Tue, 05 Sep 2000 23:59:55 -0700

On Tue, 5 Sep 2000, Barrie Slaymaker wrote:

BS>Brad Appleton wrote:
BS>> 
BS>> Marek has been doing similar work on an experimental
BS>> Pod::Compiler module that sounds suspiciously like your
BS>> Pod::StdParser. Definitely make sure you look at each
BS>> other's code before going too much further.
BS>
BS>I haven't seen it published anywhere, but his previous description sounds very much
BS>DOMish.  This isn't DOMish: it doesn't build a tree of objects, it is a lot like
BS>Pod::Parser in that it generates events during parsing and provides an OO 
scaffolding
BS>for handling those events (by overloading at this time, perhaps by callback later).

I see your point, but if you think of complex converters like HTML,
FrameMaker (MIF) or LaTeX, then it is *extremely* handy to have a tree of
objects, so you always have the full information about nesting, sisters
etc easily accessible. And for hyperlinking you need two passes anyway,
and thanks to Storable you can conveniently store the whole tree and run
the second pass over it as soon as all hyperlink destinations have been
computed. I don't want to go into the details, but I have certainly a lot
of pros for an OO approach.

I haven't coded Pod::Compiler into my Marek::Pod::HTML yet, but I expect a
dramatic increase in performance when converting e.g. the whole Perl
distribution PODs.

There are two differing requirements here: On the one hand, we need a
fast! parser for online viewing, on the other a base framework for complex
converters to avoid coding the same all over again.

BS>If Marek will point me to his code, I'll gladly peruse it.

I've included my 0.01 of Pod::Compiler and a sample script (sorry, no
module package) for you to have a look at. Yes, there are a lot of
TODOs.

BS>Do you have any feedback on Pod::StdParser & Pod::Tests in the meanwhile?

I've had a brief look: Yes, it is a sensible extension of Pod::Parser, but
it uses callbacks rather than creating a tree. The functionality of
Pod::StdParser could quite easily be included in Pod::Compiler, IMHO.

The big question is: Could the tree-approach satisfy all requirements or
do we need separate, highly optimized converters for every single output
format?

-Marek

#!/opt/perl_5.6.0/bin/perl -w



use strict;



use Marek::Pod::Compiler qw(pod_compile);



my $root = pod_compile(shift @ARGV, \*STDOUT);



$root->walk_down({ callback => \&_disp, _depth => 0 });



my $file = "/tmp/pod$$";

print "storing to $file\n";

$root->store($file);



print "retrieving $file\n";

my $newroot = Pod::root->read($file);



print $newroot->as_pod,"\n";



print "\n\nNODES:\n";

my %nodes = $newroot->nodes();



foreach(keys %nodes) {

  print "  $_  $nodes{$_}\n";

}



sub _disp

{

  my ($node,$href) = @_;

  my $indent = '  ' x $href->{_depth};

  print $indent,ref($node),"\n";

  1;

}

# -*- perl -*-
#############################################################################
# Pod/Compiler.pm -- compiles POD into an object tree
#
# Copyright (C) 2000 by Marek Rouchal. All rights reserved.
# This package is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#############################################################################

package Marek::Pod::Compiler;

=head1 NAME

Marek::Pod::Compiler - compile POD into an object tree

=head1 SYNOPSIS

  use Marek::Pod::Compiler;

=head1 DESCRIPTION

THIS IS PRELIMINARY SOFTWARE! The Marek:: namespace is strictly
preliminary until a regular place in CPAN is found.

=cut

use strict;

require Exporter;
require Pod::Parser;
require Tree::DAG_Node;
#$Tree::DAG_Node::Debug = 1;
require Storable;

$Marek::Pod::Compiler::VERSION = '0.01';
@Marek::Pod::Compiler::ISA = qw(Exporter Pod::Parser);

@Marek::Pod::Compiler::EXPORT = qw();
@Marek::Pod::Compiler::EXPORT_OK = qw(pod_compile);

##############################################################################

sub pod_compile
{
  my ($infile) = @_;

  my $compiler = Marek::Pod::Compiler->new();
  $compiler->parse_from_file($infile,\*STDOUT);
  $compiler->root();
}

##-------------------------------
## Method definitions begin here
##-------------------------------

# set up a new object
sub new {
    my $this = shift;
    my $class = ref($this) || $this;
    my %params = @_;
    my $self = {%params};
    bless $self, $class;
    $self->initialize();
    return $self;
}

# initalize, set defaults
sub initialize {
    my $self = shift;

    ## Options
    # the POD name
    $self->{name} ||= '';

    # no parser errors here, we've seen them in the first pass
    $self->SUPER::errorsub(sub { return 1; });
}

=item name()

Set/retrieve the C<-name> property, i.e. the canonical Pod name
(e.g. C<Pod::HTML>).

=cut

# set and/or retrieve canonical name of POD
sub name
{
  return (@_ > 1) ? ($_[0]->{name} = $_[1]) : $_[0]->{name};
}

sub root
{
  return (@_ > 1) ? ($_[0]->{_root} = $_[1]) : $_[0]->{_root};
}

sub current
{
  return (@_ > 1) ? ($_[0]->{_current} = $_[1]) : $_[0]->{_current};
}

## overrides for Pod::Parser

# things to do at start of POD
sub begin_input {
    my $self = shift;
    $self->current($self->root(Pod::root->new()));
}

# things to do at end of POD
sub end_pod {
  my $self = shift;
  1;
}

# expand a POD command
sub command {
  my ($self, $cmd, $paragraph, $line, $pod_para) = @_;
  my ($file) = $pod_para->file_line;

  # Check the command syntax
  $paragraph =~ s/\s+$//s;

  if($cmd eq 'over') {
    # check argument
    my $indent = 4; # default
    if($paragraph =~ /^\s*(\d+)$/s) {
      $indent = $1;
    } else {
      warn "Not a numeric argument for =over: '$paragraph'\n";
    }
    # start a new list
    my $list = Pod::list->new;
    $list->indent($indent);
    $list->start($line); # remember start line of list
    $self->current->add_daughter($list);
    $self->current($list); # make list current context
  }

  elsif($cmd eq 'item') {
    # are we in a list?
    my $current = $self->current;
    if($current->isa('Pod::begin')) {
      $current = $current->mother;
    }
    unless($current->isa('Pod::list')) {
      warn "=item without previous =over\n";
      # auto-open a list
      my $list = Pod::list->new;
      $list->start($line);
      $list->autoopen(1);
      $self->current->add_daughter($list);
      $self->current($list);
    }
    my $list = $self->current;
    # check whether the previous item had some contents
    my $last_item = ($list->daughters)[-1];
    if($last_item && $last_item->isa('Pod::item') && !($last_item->daughters)) {
      warn "previous =item has no contents\n";
    }
    # TODO non-items in =over/=back

    # check argument
    # TODO killing of first characters according to list type?!?
    my $type = 'definition';
    if($paragraph =~ /\S/s) {
      if($paragraph =~ s/^\s*[*](\s+|$)//s) {
        $type = 'bullet';
      }
      elsif($paragraph =~ s/^\s*([1-9]\d*)\.?(\s+|$)//s) {
        # TODO verify numbering
        $type = "number";
      }
    } else {
      warn "No argument for =item\n";
    }
    my $first = $list->type();
    unless($first) {
      $list->type($type);
    }
    elsif($first ne $type) {
      warn "=item type mismatch ('$first' vs. '$type')\n";
      $list->type($type) if($type =~ /^definition/ ||
        ($type =~ /^number/ && $first =~ /^bullet/));
    }

    # add this item
    my $item = Pod::item->new;
    $item->add_daughters($self->interpolate($paragraph, $line, $file));
    $list->add_daughter($item);
    if($first =~ /^definition/ && $item->contents_as_text !~ /\S/s) {
      warn "no text content in =item\n";
    }
  }

  elsif($cmd eq 'back') {
    # check if we have an open list
    unless($self->current->isa('Pod::list')) {
      warn "=back without previous =over\n";
    }
    else {
      # check for spurious characters
      if($paragraph =~ /\S/s) {
        warn "Spurious character(s) after =back\n";
      }
      # close list
      my $list = $self->current;
      $self->current($list->mother);

      # check for empty lists
      if(!($list->daughters)) {
        warn "No contents in =over/=back list\n";
      }
    } # end in_list
  }

  elsif($cmd =~ /^head([1-9]\d*)/) {
    my $level = $1;
    # TODO check whether the previous =head section had some contents
    # - keep track of current head contents and level
    #   and compare with this one
    #   "empty section in previous paragraph"

    # check if there is an open list
    my $current = $self->current;
    while($current->isa('Pod::list')) {
      unless($current->autoopen) {
        warn "=over on line ". $current->start() .
          " without closing =back (at $cmd)\n";
      }
      $current = $self->current($current->mother);
    }
    # check contents
    my $head = Pod::head->new($level);
    $head->add_daughters($self->interpolate($paragraph,$line,$file));
    if(length($head->contents_as_text)) {
      $current->add_daughter($head);
      # TODO remember current heading
    } else {
      warn "empty =$cmd\n";
    }
  }

  elsif($cmd eq 'begin') {
    if($self->current->isa('Pod::begin')) {
      # already have a begin
      warn "Nested =begin's (first at line TODO start)\n";
    }
    else {
      # check for argument
      my ($type,$args);
      if($paragraph =~ /^\s*(\S+)\s*(.*)/) {
        ($type,$args) = ($1,$2);
      } else {
        warn "No argument for =begin\n";
      }
      # remember the =begin
      my $begin = Pod::begin->new;
      $begin->type($type);
      $begin->args($args);
      $begin->line($line);
      $self->current->add_daughter($begin);
      $self->current($begin);
    }
  }

  elsif($cmd eq 'end') {
    if($self->current->isa('Pod::begin')) {
      # close the existing =begin
      $self->current($self->current->mother);
      # check for spurious characters
      # the closing argument is optional
      # TODO check opening/closing types
      if($paragraph =~ /^\s*(\S+)\s*(.*)$/ && $2) {
        warn  "Spurious character(s) after '=end $1'\n";
      }
    }
    else {
      # don't have a matching =begin
      warn "=end without =begin\n";
    }
  }

  elsif($cmd eq 'for') {
    my ($type,$args);
    if($paragraph =~ s/^\s*(\S+)[ \t]*([^\n]*)\n*//s) {
      ($type,$args) = ($1,$2);
      $args =~ s/\s+$//s;
    } else {
      warn "=for without formatter specification\n";
      $paragraph = ''; # do not expand paragraph below
    }
    if($paragraph =~ /\S/s) {
      my $forp = Pod::for->new;
      $forp->type($type);
      $forp->args($args);
      $forp->content($paragraph);
      # TODO check if in list?!?
      $self->current->add_daughter($forp);
    }
  }

  elsif($cmd =~ /^(pod|cut)$/) {
    # check for argument
    if($paragraph =~ /\S/) {
      warn "Spurious text after =$cmd\n";
    }
  }

  else {
    warn "Invalid pod command '$cmd'\n";
  }
}

# process a verbatim paragraph
sub verbatim {
  my ($self, $paragraph, $line, $pod_para) = @_;
  my ($file) = $pod_para->file_line;

  # strip trailing whitespace
  $paragraph =~ s/\s+$//s;

  unless($paragraph =~ /\S/) {
    # just an empty line
    return 1;
  }

  # make verbatim par
  my $verbobj;
  my $lastobj = ($self->current->daughters)[-1];
  if($lastobj && $lastobj->isa('Pod::verbatim')) {
    # recycle previous verbatim paragraph
    $verbobj = $lastobj;
    $verbobj->addline('');
  } else {
    $verbobj = Pod::verbatim->new;
    $self->current->add_daughter($verbobj);
  }
  foreach(split(/[\r\n]+/, $paragraph)) {
    s/\s*$//s;
    $verbobj->addline($_);
  }
  1;
}

# a regular text paragraph
sub textblock {
  my ($self, $paragraph, $line, $pod_para) = @_;
  my ($file) = $pod_para->file_line;

  $paragraph =~ s/\s+$//s;
  my $current = $self->current;
  # TODO check context (list, begin)
  if($current->isa('Pod::begin')) {
    $current->addchunk($paragraph);
  } else {
    my $par = Pod::paragraph->new;
    $par->add_daughters($self->interpolate($paragraph,$line,$file));
    # check for non-empty content
    if($par->contents_as_text =~ /\S/) {
      $current->add_daughter($par);
    } else {
      warn "no text contents in paragraph\n";
    }
  }
}

# expand a POD text string
sub interpolate
{
  my ($self,$paragraph,$line,$file) = @_;
  # expand the interior sequences
  $self->_expand_ptree($self->parse_text($paragraph,$line),
    $line, $file, '');
}

sub _expand_ptree
{
  my ($self,$ptree,$line,$file,$nestlist) = @_;
  local($_);
  my @obj = ();
  # process each node in the parse tree
  foreach(@$ptree) {
    # regular text chunk
    unless(ref) {
      my $chunk = $_;

      # TODO do magic linebreaking
      #while($chunk =~ s/^([^\n]*)\n([ \t]+)//) {
      #    my ($line,$indent) = ($1,$2);
      #}

      # TODO escape whitespace if in S<>
      #if($nestlist =~ /S/) {
      #}

      my $str = Pod::string->new($chunk);
      $str->nbsp(1) if($nestlist =~ /S/);
      $str->nestlist($nestlist);
      push(@obj, $str);
      next; # finished this chunk
    }

    # have an interior sequence
    my $cmd = $_->cmd_name();
    my $contents = $_->parse_tree();
    my $file;
    ($file,$line) = $_->file_line();

    # an entity
    if($cmd eq 'E') {
      my $str = $contents->raw_text();
      my $ent = Pod::entity->decode($str);
      # TODO recyle string if "normal" ascii character
      unless($ent) {
        warn "unrecognized entity '$str'\n";
      } else {
        push(@obj, $ent);
      }
    }

    # a hyperlink
    elsif($cmd eq 'L') {
      # try to parse the hyperlink
      $_ = $contents->raw_text();

      # collapse newlines with whitespace
      if(s/\s*\n+\s*/ /g) {
        warn "collapsing newlines to blanks\n";
      }
      # strip leading/trailing whitespace
      if(s/^[\s\n]+|[\s\n]+$//) {
        warn "ignoring leading/trailing whitespace in link\n";
      }
      unless(length($_)) {
        warn "ERROR: empty link\n";
        next;
      }

      # Check for different possibilities. This is tedious and error-prone
      # we match all possibilities (alttext, page, section/item)
      #warn "DEBUG: link=$_\n";

      # only page
      # problem: a lot of people use (), or (1) or the like to indicate
      # man page sections. But this collides with L<func()> that is supposed
      # to point to an internal funtion...
      my $page_rx = '[\w.]+(?:::[\w.]+)*(?:[(](?:\d\w*|)[)]|)';
      my $url_rx = '(?:http|ftp|mailto|news):.+';

      my ($alttext,$page,$type,$node);

      if(m!^($page_rx)$!o) {
        $page = $1;
        $type = 'page';
      }
      # alttext, page and "section"
      elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
        ($alttext, $page, $node) = ($1, $2, $3);
        $type = 'section';
      }
      # alttext and page
      elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) {
        ($alttext, $page) = ($1, $2);
        $type = 'page';
      }
      # alttext and "section"
      elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
        ($alttext, $node) = ($1,$2);
        $type = 'section';
      }
      # page and "section"
      elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
        ($page, $node) = ($1, $2);
        $type = 'section';
      }
      # page and item
      elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
        ($page, $node) = ($1, $2);
        $type = 'item';
      }
      # only "section"
      elsif(m!^/?"(.+)"$!) {
        $node = $1;
        $type = 'section';
      }
      # only item
      elsif(m!^\s*/(.+)$!) {
        $node = $1;
        $type = 'item';
      }
      # non-standard: URL
      elsif(m!^($url_rx)$!io) {
        $node = $1;
        $type = 'url';
      }
      # alttext, page and item
      elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
        ($alttext, $page, $node) = ($1, $2, $3);
        $type = 'item';
      }
      # alttext and item
      elsif(m!^(.*?)\s*[|]\s*/(.+)$!) {
        ($alttext, $node) = ($1,$2);
      }
      # nonstandard: alttext and url
      elsif(m!^(.*?)\s*[|]\s*($url_rx)$!o) {
        ($alttext, $node) = ($1,$2);
        $type = 'url';
      }
      # must be an item or a "malformed" section (without "")
      else {
        $node = $_;
        $type = 'item';
      }

      # empty alternative text expands to node name or page name
      if(defined $alttext) {
        if(!length($alttext)) {
          $alttext = $node | $page;
        }
        elsif($alttext =~ m:[|/]:) {
          warn "alternative text '$alttext' contains non-escaped | or /\n";
        }
      }

      if(defined $page && $page =~ /[(]\w*[)]$/) {
        warn "(section) in '$page' deprecated\n";
      }
      if(defined $node && $node =~ m:[|/]:) {
        warn "node '$node' contains non-escaped | or /\n";
      }
      my $link = Pod::link->new;
      if(defined $alttext) {
        my @txt = $self->interpolate($alttext,$line,$file);
        $link->alttext(@txt);
      }
      $link->page($page) if($page);
      if($node) {
        # collapse markup
        my $txt = join('', map { $_->as_text } $self->interpolate($node,$line,$file));
        $txt =~ s/\s+/ /gs; # collapse whitespace
        $txt =~ s/^\s+|\s+$//gs; # trim whitespace
        $link->node($node);
      }
      $link->type($type);
      push(@obj, $link);
    }

    # bold text
    elsif($cmd eq 'B') {
      if($nestlist =~ /B/) {
        warn "...B<...B<...>...>...\n";
      }
      my $bold = Pod::bold->new;
      $bold->nestlist($nestlist);
      $bold->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'B'));
      push(@obj, $bold);
    }

    # code text
    elsif($cmd eq 'C') {
      if($nestlist =~ /C/) {
        warn "...C<...C<...>...>...\n";
      }
      my $code = Pod::code->new;
      $code->nestlist($nestlist);
      $code->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'C'));
      push(@obj, $code);
    }

    # file text
    elsif($cmd eq 'F') {
      if($nestlist =~ /F/) {
        warn "...F<...F<...>...>...\n";
      }
      my $file = Pod::file->new;
      $file->nestlist($nestlist);
      $file->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'F'));
      push(@obj, $file);
    }

    # italic text
    elsif($cmd eq 'I') {
      # TODO I<...I<...>...> should be expanded to
      # <I>...</I>...<I>...</I> - according to Achim Bohnet
      if($nestlist =~ /I/) {
        warn "...I<...I<...>...>...\n";
      }
      my $italic = Pod::italic->new;
      $italic->nestlist($nestlist);
      
$italic->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'I'));
      push(@obj, $italic);
    }

    # non-breakable space
    elsif($cmd eq 'S') {
      # note: sets also special flag in strings
      if($nestlist =~ /S/) {
        warn "...S<...S<...>...>...\n";
      }
      my $nbsp = Pod::nonbreaking->new;
      $nbsp->nestlist($nestlist);
      $nbsp->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'S'));
      push(@obj, $nbsp);
    }

    # zero-size element
    elsif($cmd eq 'Z') {
      # TODO check that it has no content
      my $zero = Pod::zero->new;
      $zero->nestlist($nestlist);
      push(@obj, $zero);
    }

    # custom index entries
    # TODO these should run also throgh Pod::Checker and result in
    # valid L<...> destinations
    elsif($cmd eq 'X') {
      # tag this with a unique identifier and add it to the index
      if($nestlist =~ /X/) {
        warn "...X<...X<...>...>...\n";
      }
      my $idx = Pod::idx->new;
      $idx->nestlist($nestlist);
      $idx->add_daughters($self->_expand_ptree($contents,$line,$file,$nestlist.'X'));
      push(@obj, $idx);
    }

    else {
      # ignore everything else
      warn "Invalid command '$cmd'\n";
    }
  } # end for ptree
  @obj;
}

=head1 SEE ALSO

L<Pod::Checker>, L<Pod::Parser>, L<Pod::Find>,
L<pod2man>, L<pod2text>, L<Pod::Man>

=head1 AUTHOR

Marek Rouchal E<lt>[EMAIL PROTECTED]<gt>

=head1 HISTORY

A big deal of this code has been recycled from a variety of existing
Pod converters, e.g. by Tom Christiansen and Russ Allbery. A lot of
ideas came from Nick Ing-Simmons' B<PodToHtml>, e.g. the usage of the
B<HTML::Element> module and the B<HTML::FormatPS> module.
Without the B<Pod::Parser> module by Brad Appleton and the
B<HTML::Element> module by Gisle Aas this module would not exist.

=cut

##############################################################################

package Pod::_obj;

@Pod::_obj::ISA = qw(Tree::DAG_Node);

sub new
{
    my $this = shift;
    my $class = ref($this) || $this;
    my @params = @_;
    my $self = $class->SUPER::new;
    bless $self, $class;
    $self->initialize(@params);
    return $self;
}

sub initialize
{
  1;
}

sub line
{
  return (@_ > 1) ? ($_[0]->{_line} = $_[1]) : $_[0]->{_line};
}

sub contents_as_pod($)
{
  $_[0]->_as_pod;
}

sub _as_pod($;$$)
{
  my ($self,$first,$last) = @_;
  my $str = defined $first ? $first : '';
  foreach($self->daughters) {
    $str .= $_->as_pod;
  }
  $str .= defined $last ? $last : '';
  $str;
}

sub contents_as_text($)
{
  my $text = $_[0]->_as_text;
  $text =~ s/^\s+|\s+$//sg;
  $text =~ s/\s+/ /sg;
  $text;
}

sub _as_text($$)
{
  my ($self,$first,$last) = @_;
  my $text = $first || '';
  foreach($self->daughters) {
    $text .= $_->as_text;
  }
  $text .= $last || '';
  $text;
}

##############################################################################

package Pod::root;

@Pod::root::ISA = qw(Pod::_obj Storable);

sub initialize
{
  1;
}

sub as_pod($)
{
  shift->_as_pod()."=cut\n";
}

sub as_text($)
{
  shift->_as_text('');
}

sub read($$)
{
  my ($pseudoclass,$file) = @_;
  my $obj = Storable::retrieve($file);
  $obj;
}

sub nodes
{
  my $self = shift;
  my %nodes = ();
  $self->walk_down({ callback => \&_get_nodes, _depth => 0, _nodes => \%nodes });
  %nodes;
}

sub _get_nodes
{
  my ($node,$href) = @_;
  if($node->isa("Pod::head")) {
    my $txt = $node->as_text;
    $href->{_nodes}->{$txt} = 1;
    return 0;
  }
  elsif($node->isa("Pod::item")) {
    my $txt = $node->as_text;
    $href->{_nodes}->{$txt} = 2;
    return 0;
  }
  elsif($node->isa("Pod::idx")) {
    my $txt = $node->as_text;
    $href->{_nodes}->{$txt} = 3;
    return 0;
  }
  1;
}

##############################################################################

package Pod::paragraph;

@Pod::paragraph::ISA = qw(Pod::_obj);

sub initialize
{
  1;
}

sub as_pod($)
{
  my $self = shift;
  $self->_as_pod($self->my_daughter_index?'':"=pod\n\n","\n\n");
}

sub as_text($)
{
  my $self = shift;
  $self->_as_text('');
}

##############################################################################

package Pod::verbatim;

@Pod::verbatim::ISA = qw(Pod::_obj);

sub initialize
{
  shift->{_content} = [];
}

sub addline($$)
{
  push(@{$_[0]->{_content}}, $_[1]);
}

sub content($@)
{
  my $self = shift;
  if(@_) {
    if(defined $_[0]) {
      @{$self->{_content}} = @_;
    } else {
      $self->{_content} = [];
    }
  }
  @{$self->{_content}};
}

sub as_pod($)
{
  my $self = shift;
  ($self->my_daughter_index?'':"=pod\n\n").
  join("\n",$self->content)."\n\n";
}

sub as_text($)
{
  my $self = shift;
  join("\n",$self->content);
}

##############################################################################

package Pod::head;

@Pod::head::ISA = qw(Pod::_obj);

sub initialize
{
  $_[0]->level($_[1] || 1);
}

sub level($$)
{
  return (@_ > 1) ? ($_[0]->{_level} = $_[1]) : $_[0]->{_level};
}

sub as_pod($)
{
  my $self = shift;
  $self->_as_pod("=head".$self->level." ","\n\n");
}

sub as_text($)
{
  my $self = shift;
  $self->_as_text('');
}

##############################################################################

package Pod::list;

@Pod::list::ISA = qw(Pod::_obj);

sub initialize
{
  $_[0]->autoopen(0);
  $_[0]->type('');
  $_[0]->indent(4);
}

sub autoopen($$)
{
  return (@_ > 1) ? ($_[0]->{_auto} = $_[1]) : $_[0]->{_auto};
}

sub indent($$)
{
  return (@_ > 1) ? ($_[0]->{_indent} = $_[1]) : $_[0]->{_indent};
}

sub start($$)
{
  return (@_ > 1) ? ($_[0]->{_start} = $_[1]) : $_[0]->{_start};
}

sub type($$)
{
  return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type};
}

sub as_pod($)
{
  my $self = shift;
  $self->_as_pod("=over ".$self->indent."\n\n","=back\n\n")
}

sub as_text($)
{
  my $self = shift;
  $self->_as_text('');
}

##############################################################################

package Pod::item;

@Pod::item::ISA = qw(Pod::_obj);

sub as_pod($)
{
  my $self = shift;
  my $mum = $self->mother;
  my $prefix = '';
  if($mum) {
    my $type = $mum->type;
    if($type =~ /^bullet/) {
      $prefix = '*';
    }
    elsif($type =~ /^number/) {
      $prefix = ($self->my_daughter_index+1).'.';
    }
  }
  my $contents = $self->contents_as_pod;
  "=item $prefix".($contents?" $contents":'')."\n\n";
}

sub as_text($)
{
  my $self = shift;
  $self->_as_text('');
}

##############################################################################

package Pod::begin;

@Pod::begin::ISA = qw(Pod::_obj);

sub initialize
{
  $_[0]->type('unknown');
  $_[0]->args('');
  $_[0]->line(0);
  $_[0]->{_chunks} = [];
}

sub type($$)
{
  return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type};
}

sub args($$)
{
  return (@_ > 1) ? ($_[0]->{_args} = $_[1]) : $_[0]->{_args};
}

sub start($$)
{
  return (@_ > 1) ? ($_[0]->{_start} = $_[1]) : $_[0]->{_start};
}

sub addchunk($$)
{
  push(@{$_[0]->{_chunks}},$_[1]);
}

sub contents($)
{
  return @{shift->{_chunks}};
}

sub as_pod($)
{
  my $self = shift;
  "=begin ".$self->type.($self->args ? " ".$self->args:'')."\n\n".
  join("\n",@{$self->{_chunks}})."\n\n=end\n\n";
}

sub as_text($)
{
  my $self = shift;
  # TODO not really applicable, is it?
  '';
}

##############################################################################

package Pod::for;

@Pod::for::ISA = qw(Pod::_obj);

sub initialize
{
  $_[0]->type('unknown');
  $_[0]->args('');
  $_[0]->line(0);
  $_[0]->{_chunks} = [];
}

sub type($$)
{
  return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type};
}

sub args($$)
{
  return (@_ > 1) ? ($_[0]->{_args} = $_[1]) : $_[0]->{_args};
}

sub content($)
{
  return (@_ > 1) ? ($_[0]->{_content} = $_[1]) : $_[0]->{_content};
}

sub as_pod($)
{
  my $self = shift;
  $self->_as_pod("=for ".$self->type.($self->args ? " ".$self->args:'')."\n".
    $self->content)."\n\n";
}

sub as_text($)
{
  my $self = shift;
  # TODO not really applicable, is it?
  '';
}

##############################################################################

package Pod::_text;

@Pod::_text::ISA = qw(Pod::_obj);

sub nestlist($$)
{
  return (@_ > 1) ? ($_[0]->{_nestlist} = $_[1]) : $_[0]->{_nestlist};
}

##############################################################################

package Pod::string;

@Pod::string::ISA = qw(Pod::_text);

sub initialize
{
  $_[0]->content(defined $_[1] ? $_[1] : '');
}

sub content($$)
{
  return (@_ > 1) ? ($_[0]->{_content} = $_[1]) : $_[0]->{_content};
}

sub nbsp($$)
{
  return (@_ > 1) ? ($_[0]->{_nbsp} = $_[1]) : $_[0]->{_nbsp};
}

sub as_pod($)
{
  my $self = shift;
  # TODO deal with entities
  $self->content;
}

sub as_text($)
{
  my $self = shift;
  # TODO deal with entities
  $self->content;
}

##############################################################################

package Pod::bold;

@Pod::bold::ISA = qw(Pod::_text);

sub as_pod($)
{
  my $self = shift;
  # TODO deal with entities
  'B<'.$self->contents_as_pod.'>';
}

sub as_text($)
{
  shift->contents_as_text;
}

##############################################################################

package Pod::italic;

@Pod::italic::ISA = qw(Pod::_text);

sub as_pod($)
{
  my $self = shift;
  # TODO deal with entities
  'I<'.$self->contents_as_pod.'>';
}

sub as_text($)
{
  shift->contents_as_text;
}

##############################################################################

package Pod::code;

@Pod::code::ISA = qw(Pod::_text);

sub as_pod($)
{
  my $self = shift;
  # TODO deal with entities
  'C<'.$self->contents_as_pod.'>';
}

sub as_text($)
{
  shift->contents_as_text;
}

##############################################################################

package Pod::file;

@Pod::file::ISA = qw(Pod::_text);

sub as_pod($)
{
  my $self = shift;
  # TODO deal with entities
  'F<'.$self->contents_as_pod.'>';
}

sub as_text($)
{
  shift->contents_as_text;
}

##############################################################################

package Pod::nonbreaking;

@Pod::nonbreaking::ISA = qw(Pod::_text);

sub as_pod($)
{
  my $self = shift;
  # TODO deal with entities
  'S<'.$self->contents_as_pod.'>';
}

sub as_text($)
{
  shift->contents_as_text;
}

##############################################################################

package Pod::zero;

@Pod::zero::ISA = qw(Pod::_text);

sub as_pod($)
{
  'Z<>';
}

sub as_text($)
{
  '';
}

##############################################################################

package Pod::idx;

@Pod::idx::ISA = qw(Pod::_text);

sub as_pod($)
{
  my $self = shift;
  # TODO deal with entities
  'X<'.$self->contents_as_pod.'>';
}

sub as_text($)
{
  shift->contents_as_text;
}

##############################################################################

package Pod::entity;

@Pod::entity::ISA = qw(Pod::_text);

# stolen from HTML::Entities
my %ENTITIES = (
 # Some normal chars that have special meaning in SGML context
 amp    => '&',  # ampersand 
'gt'    => '>',  # greater than
'lt'    => '<',  # less than
 quot   => '"',  # double quote

 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
 AElig  => '�',  # capital AE diphthong (ligature)
 Aacute => '�',  # capital A, acute accent
 Acirc  => '�',  # capital A, circumflex accent
 Agrave => '�',  # capital A, grave accent
 Aring  => '�',  # capital A, ring
 Atilde => '�',  # capital A, tilde
 Auml   => '�',  # capital A, dieresis or umlaut mark
 Ccedil => '�',  # capital C, cedilla
 ETH    => '�',  # capital Eth, Icelandic
 Eacute => '�',  # capital E, acute accent
 Ecirc  => '�',  # capital E, circumflex accent
 Egrave => '�',  # capital E, grave accent
 Euml   => '�',  # capital E, dieresis or umlaut mark
 Iacute => '�',  # capital I, acute accent
 Icirc  => '�',  # capital I, circumflex accent
 Igrave => '�',  # capital I, grave accent
 Iuml   => '�',  # capital I, dieresis or umlaut mark
 Ntilde => '�',  # capital N, tilde
 Oacute => '�',  # capital O, acute accent
 Ocirc  => '�',  # capital O, circumflex accent
 Ograve => '�',  # capital O, grave accent
 Oslash => '�',  # capital O, slash
 Otilde => '�',  # capital O, tilde
 Ouml   => '�',  # capital O, dieresis or umlaut mark
 THORN  => '�',  # capital THORN, Icelandic
 Uacute => '�',  # capital U, acute accent
 Ucirc  => '�',  # capital U, circumflex accent
 Ugrave => '�',  # capital U, grave accent
 Uuml   => '�',  # capital U, dieresis or umlaut mark
 Yacute => '�',  # capital Y, acute accent
 aacute => '�',  # small a, acute accent
 acirc  => '�',  # small a, circumflex accent
 aelig  => '�',  # small ae diphthong (ligature)
 agrave => '�',  # small a, grave accent
 aring  => '�',  # small a, ring
 atilde => '�',  # small a, tilde
 auml   => '�',  # small a, dieresis or umlaut mark
 ccedil => '�',  # small c, cedilla
 eacute => '�',  # small e, acute accent
 ecirc  => '�',  # small e, circumflex accent
 egrave => '�',  # small e, grave accent
 eth    => '�',  # small eth, Icelandic
 euml   => '�',  # small e, dieresis or umlaut mark
 iacute => '�',  # small i, acute accent
 icirc  => '�',  # small i, circumflex accent
 igrave => '�',  # small i, grave accent
 iuml   => '�',  # small i, dieresis or umlaut mark
 ntilde => '�',  # small n, tilde
 oacute => '�',  # small o, acute accent
 ocirc  => '�',  # small o, circumflex accent
 ograve => '�',  # small o, grave accent
 oslash => '�',  # small o, slash
 otilde => '�',  # small o, tilde
 ouml   => '�',  # small o, dieresis or umlaut mark
 szlig  => '�',  # small sharp s, German (sz ligature)
 thorn  => '�',  # small thorn, Icelandic
 uacute => '�',  # small u, acute accent
 ucirc  => '�',  # small u, circumflex accent
 ugrave => '�',  # small u, grave accent
 uuml   => '�',  # small u, dieresis or umlaut mark
 yacute => '�',  # small y, acute accent
 yuml   => '�',  # small y, dieresis or umlaut mark

 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
 copy   => '�',  # copyright sign
 reg    => '�',  # registered sign
 nbsp   => "\240", # non breaking space

 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
 iexcl  => '�',
 cent   => '�',
 pound  => '�',
 curren => '�',
 yen    => '�',
 brvbar => '�',
 sect   => '�',
 uml    => '�',
 ordf   => '�',
 laquo  => '�',
'not'   => '�',    # not is a keyword in perl
 shy    => '�',
 macr   => '�',
 deg    => '�',
 plusmn => '�',
 sup1   => '�',
 sup2   => '�',
 sup3   => '�',
 acute  => '�',
 micro  => '�',
 para   => '�',
 middot => '�',
 cedil  => '�',
 ordm   => '�',
 raquo  => '�',
 frac14 => '�',
 frac12 => '�',
 frac34 => '�',
 iquest => '�',
'times' => '�',    # times is a keyword in perl
 divide => '�',

# some POD special entities
 verbar => '|',
 sol => '/'
);

sub decode
{
  my ($class,$str) = @_;
  my $ent = $class->new;
  $str =~ s/^\s+|\s+$//sg;
  my $value;
  if($str =~ /^(0x[0-9a-f]+)$/i) {
    # hexadecimal
    $value = hex($1);
  }
  elsif($str =~ /^(0[0-7]+)$/) {
    # octal
    $value = oct($1);
  }
  elsif($str =~ /^(\d+)$/) {
    # decimal
    $value = $1;
  }
  elsif($str =~ /^(\w+)$/i) {
    if(my $ent = $ENTITIES{$1}) {
      $value = ord($ent);
    }
  }
  return undef unless($value);
  $ent->value($value);
  $ent;
}

sub initialize
{
  $_[0]->value(defined $_[1] ? $_[1] : '');
}

sub value($$)
{
  # value is number in ISO-8859-1
  return (@_ > 1) ? ($_[0]->{_value} = $_[1]) : $_[0]->{_value};
}

sub as_pod($)
{
  my $self = shift;
  # TODO deal with nonbreaking space
  my $value = $self->value;
  my $chr = chr($value);
  my ($ent) = grep($_->[1] eq $chr, map { [ $_ => $ENTITIES{$_} ] }
    keys %ENTITIES);
  $ent = $ent ? $ent->[0] : $value;
  "E<$ent>";
}

sub as_text($)
{
  chr(shift->value);
}

##############################################################################

package Pod::link;

@Pod::link::ISA = qw(Pod::_text);

=head2 Pod::link

B<Pod::link> is a class for manipulation of POD hyperlinks. Usage:

  my $link = Pod::link->new('alternative text|page/"section in page"');

The B<Pod::link> class is mainly designed to parse the contents of the
C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
different parts of a POD hyperlink for further processing. It can also be
used to construct hyperlinks.

=over 4

=item Pod::link-E<gt>new()

The B<new()> method can either be passed a set of key/value pairs or a single
scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
of the class C<Pod::link> is returned. The value C<undef> indicates a
failure, the error message is stored in C<$@>.

=cut

use Carp;

# TODO node/item as marked-up text
# ...but additional method node_text which is plain ISO-8859-1

sub initialize {
  my $self = shift;
  $self->{_line} ||= undef;
  $self->{_file} ||= undef;
  $self->{_page} ||= '';
  $self->{_node} ||= '';
  $self->{_alttext} ||= [];
  $self->{_type} ||= undef;
  if(defined $_[0]) {
    if(ref($_[0])) {
      # called with a list of parameters
      %$self = (%$self, %{$_[0]});
    }
  }
  $self;
}

=item $link-E<gt>text()

This method returns the textual representation of the hyperlink as above,
but without markers (read only). Depending on the link type this is one of
the following alternatives (the + and * denote the portions of the text
that are marked up):

  the +perl+ manpage
  the *$|* entry in the +perlvar+ manpage
  the section on *OPTIONS* in the +perldoc+ manpage
  the section on *DESCRIPTION* elsewhere in this document

=cut

# The complete link's text
sub as_text {
  my $self = shift;
  my @alttext = $self->alttext;
  if(@alttext) {
    return join('', map { $_->as_text } @alttext);
  }
  my $type = $self->{_type};
  my $section = $self->{_node};
  if($type eq 'url') {
    return $section;
  }
  my $page = $self->{_page};
  my $page_ext = '';
  $page =~ s/([(]\w*[)])$// && ($page_ext = $1);
  (!$section ? '' : $type eq 'item' ?
    "the $section entry" : "the section on $section" ) .
    ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
    ' elsewhere in this document');
}

=item $link-E<gt>markup($str_method,$page_cb,$node_cb)

Markup the link with appropriate strings. The textual objects within the
link are expanded with the I<$method> method.

=cut

sub markup {
  my ($self,$method,$page_cb,$node_cb) = @_;

  my @alttext = $self->alttext;
  my $node = $self->{_node};
  my $type = $self->{_type};
  my $page = $self->{_page};

  if(@alttext) {
    my ($obj,$callback);
    if($node) {
      $callback = $node_cb || $page_cb;
    } else {
      $callback = $page_cb || $node_cb;
    }
    return &$callback($self, @alttext);
  }

  if($type eq 'url') {
    my $callback = $node_cb || $page_cb;
    return &$callback($self, Pod::string->new($node));
  }

  my $page_ext = '';
  $page =~ s/([(]\w*[)])$// && ($page_ext = $1);

  unless($node) {
    # we must have a page
    my $pre = Pod::string->new("the ");
    my $post = Pod::string->new(" manpage");
    my $callback = $page_cb || $node_cb;
    return($pre->$method,&$callback($self, Pod::string->new($page)),$post->$method);
  } else {
    my @conv;
    if($type eq 'item') {
      @conv = ( (Pod::string->new("the "))->$method,
        &$node_cb($self,Pod::string->new($node)),
        (Pod::string->new(" entry"))->$method );
    } else {
      @conv = ( (Pod::string->new("the section on "))->$method,
        &$node_cb($self,Pod::string->new($node)));
    }
    if($page) {
      push(@conv, (Pod::string->new("the "))->$method,
        &$page_cb($self, Pod::string->new($page)),
        ($page_ext ? (Pod::string->new($page_ext))->$method : ()),
        (Pod::string->new(" manpage"))->$method); 
    } else {
      push(@conv, (Pod::string->new(" elsewhere in this document"))->$method);
    }
    return @conv;
  }
}

=item $link-E<gt>file()

=item $link-E<gt>line()

Just simple slots for storing information about the line and the file
the link was encountered in. Has to be filled in manually.

=cut

# The line in the file the link appears
sub line {
  return (@_ > 1) ? ($_[0]->{_line} = $_[1]) : $_[0]->{_line};
}

# The POD file name the link appears in
sub file {
  return (@_ > 1) ? ($_[0]->{_file} = $_[1]) : $_[0]->{_file};
}

=item $link-E<gt>page()

This method sets or returns the POD page this link points to.

=cut

# The POD page the link appears on
sub page {
  if (@_ > 1) {
    $_[0]->{_page} = $_[1];
  }
  $_[0]->{_page};
}

=item $link-E<gt>node()

As above, but the destination node text of the link.

=cut

# The link destination
sub node {
  if (@_ > 1) {
    $_[0]->{_node} = $_[1];
  }
  $_[0]->{_node};
}

=item $link-E<gt>alttext()

Sets or returns an alternative text specified in the link.

=cut

# Potential alternative text
sub alttext {
  my $self = shift;
  if (@_) {
    $self->{_alttext} = [ @_ ];
  }
  @{$self->{_alttext}};
}

=item $link-E<gt>type()

The node type, either C<section> or C<item>. As an unofficial type,
there is also C<url>, derived from e.g. C<LE<lt>http://perl.comE<gt>>

=cut

# The type: item or headn
sub type {
  return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}

=item $link-E<gt>as_pod()

Returns the link as contents of C<LE<lt>E<gt>>.

=back

=cut

# The link itself
sub as_pod {
  my $self = shift;
  my $link = $self->page() || '';
  my $node = $self->node();
  if($node) {
    $node =~ s/\|/E<verbar>/g;
    $node =~ s:/:E<sol>:g;
    if($self->type() eq 'section') {
      $link .= ($link ? '/' : '') . qq{"$node"};
    }
    elsif($self->type() eq 'url') {
      $link = $self->node(); # TODO to escape or not to escape?
    }
    else { # item
      $link .= '/' . $node;
    }
  }
  my @txt = $self->alttext();
  if(@txt) {
    my $text = join('', map { $_->as_pod } @txt);
    $text =~ s/\|/E<verbar>/g;
    $text =~ s:/:E<sol>:g;
    $link = "$text|$link";
  }
  "L<$link>";
}

##############################################################################

1;

Re: [p5pod] [ANNOUNCE] Pod::StdParser, Pod::Tests proof of concept

Reply via email to