stas 2002/06/15 11:20:33 Modified: lib/DocSet RunTime.pm lib/DocSet/Doc HTML2HTML.pm POD2HTML.pm lib/DocSet/Source HTML.pm Log: sync with DocSet - improve the parsing of the E<lt>headE<gt> and make base, meta and link elements available to the templates. [Per Einar Ellefsen <[EMAIL PROTECTED]>] - correct the mapping of config.cfg to autogenerated index.html, also use path2uri to convert from a path to uri. also fix the stripping of the full base path on the non unix system, by using abs2rel from File::Spec. Revision Changes Path 1.8 +13 -23 modperl-docs/lib/DocSet/RunTime.pm Index: RunTime.pm =================================================================== RCS file: /home/cvs/modperl-docs/lib/DocSet/RunTime.pm,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- RunTime.pm 11 Jun 2002 15:02:10 -0000 1.7 +++ RunTime.pm 15 Jun 2002 18:20:33 -0000 1.8 @@ -6,7 +6,7 @@ use strict; use warnings; -use File::Spec::Functions qw(catdir catfile splitdir); +use File::Spec::Functions qw(catdir catfile splitdir abs2rel); use File::Find; use DocSet::Util; @@ -84,7 +84,8 @@ @search_paths = @{$ra_search_paths || []}; - %exts = map {$_ => 1} @{$ra_search_exts || []}; + # .cfg is for matching config.cfg to become index.html + %exts = map {$_ => 1} @{$ra_search_exts || []}, 'cfg'; my @ext_accept_pattern = map {quotemeta($_)."\$"} keys %exts; my $rsub_keep_ext = @@ -92,34 +93,23 @@ my %seen; for my $rel_path (@search_paths) { - my $full_path = catdir $base, $rel_path; - die "$full_path is not a dir" unless -d $full_path; + my $full_base_path = catdir $base, $rel_path; + die "$full_base_path is not a dir" unless -d $full_base_path; my @seen_pattern = map {"^".quotemeta($_)} keys %seen; - my $rsub_skip_seen = - build_matchmany_sub([EMAIL PROTECTED]); + my $rsub_skip_seen = build_matchmany_sub([EMAIL PROTECTED]); - # rewrite non / paths to be / as in URI ($rel_path is no more - # needed to read from the real fs, will need this fixup for - # generating proper URIs. - $rel_path = join "/", splitdir $rel_path; - - my $full_path_regex = quotemeta $full_path; - $src_docs{$rel_path} = { - "index.html" => 1, # base index.html - map { m{(.*?/?)[^/]+$} # add autogenerated index.html - ? ("$1index.html" => 1, $_ => 1) - : ($_ => 1); # shouldn't happen, but just in case - } - map {join "/", splitdir $_} # rewrite non / paths to be URI's / - map {s|$full_path_regex/||; $_} # strip the leading path + my $rel_uri = path2uri($rel_path); + $src_docs{$rel_uri} = { + map { s/config\.cfg$/index.html/; ($_ => 1) } # autogenerated index.html + map path2uri( abs2rel($_, $full_base_path) ), # full path=>relative uri grep $rsub_keep_ext->($_), # get files with wanted exts grep !$rsub_skip_seen->($_), # skip seen base dirs - @{ expand_dir($full_path) } + @{ expand_dir($full_base_path) } }; - note "Scanning for src files: $full_path"; - $seen{$full_path}++; + note "Scanning for src files: $full_base_path"; + $seen{$full_base_path}++; } # dumper \%src_docs; 1.3 +1 -0 modperl-docs/lib/DocSet/Doc/HTML2HTML.pm Index: HTML2HTML.pm =================================================================== RCS file: /home/cvs/modperl-docs/lib/DocSet/Doc/HTML2HTML.pm,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- HTML2HTML.pm 5 Feb 2002 10:27:19 -0000 1.2 +++ HTML2HTML.pm 15 Jun 2002 18:20:33 -0000 1.3 @@ -20,6 +20,7 @@ my $vars = { meta => $self->{meta}, body => [EMAIL PROTECTED], + headers => $self->{parsed_tree}{head}, dir => $self->{dir}, nav => $self->{nav}, last_modified => $self->{timestamp}, 1.7 +1 -0 modperl-docs/lib/DocSet/Doc/POD2HTML.pm Index: POD2HTML.pm =================================================================== RCS file: /home/cvs/modperl-docs/lib/DocSet/Doc/POD2HTML.pm,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- POD2HTML.pm 13 Jun 2002 09:20:16 -0000 1.6 +++ POD2HTML.pm 15 Jun 2002 18:20:33 -0000 1.7 @@ -48,6 +48,7 @@ meta => $self->{meta}, toc => $self->{toc}, body => [EMAIL PROTECTED], + headers => {}, dir => $self->{dir}, nav => $self->{nav}, last_modified => $self->{timestamp}, 1.7 +62 -20 modperl-docs/lib/DocSet/Source/HTML.pm Index: HTML.pm =================================================================== RCS file: /home/cvs/modperl-docs/lib/DocSet/Source/HTML.pm,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- HTML.pm 14 Apr 2002 06:17:48 -0000 1.6 +++ HTML.pm 15 Jun 2002 18:20:33 -0000 1.7 @@ -53,10 +53,6 @@ } } -# currently retrieves these parts from the source HTML -# head.title -# head.meta.description -# body sub parse { my($self) = @_; @@ -110,11 +106,12 @@ accum_h($self, $text); } - my $p = HTML::Parser->new(api_version => 3, - start_h => [\&start_h, "self, tagname, attr, text"], - end_h => [\&end_h, "self, tagname"], - text_h => [\&text_h, "self, text"], - ); + my $p = HTML::Parser->new( + api_version => 3, + start_h => [\&start_h, "self, tagname, attr, text"], + end_h => [\&end_h, "self, tagname"], + text_h => [\&text_h, "self, text"], + ); # Parse document text chunk by chunk $p->parse(${ $self->{content} }); $p->eof; @@ -124,26 +121,70 @@ } { - # this one retrieves and stashes away the description (As 'abstract') - # and the body and the title of the given html + # this parsing extracts the following elements and makes them + # available to templates as: + # meta.title + # head.meta.* (+ renames: description -> abstract) + # head.base + # head.link + # body + + # init my $start_h = sub { - my($self, $tagname, $attr) = @_; - if ($tagname eq 'meta' && lc $attr->{name} eq 'description') { - $self->{parsed_tree}->{abstract} = $attr->{content}; + my($self, $tagname, $attr, $text) = @_; + my $meta = $self->{parsed_tree}{head}{meta}; + + # special treatment + if ($tagname eq 'meta' && exists $attr->{name} && + lc $attr->{name} eq 'description') { + $self->{parsed_tree}{abstract} = $attr->{content}; + } + elsif ($tagname eq 'meta' && exists $attr->{content}) { + # note: doesn't take into account the 'scheme' attr, + # but that one isn't used much + if (exists $attr->{name}) { + $meta->{name}{ $attr->{name} } = $attr->{content}; + } + elsif (exists $attr->{'http-equiv'}) { + $meta->{'http-equiv'}{ $attr->{'http-equiv'} } + = $attr->{content}; + } + else { + # unsupported head element? + } + } + elsif ($tagname eq 'base') { + # there is usually only one <base> + $self->{parsed_tree}{head}{base} = $attr->{href} + if exists $attr->{href}; } + elsif ($tagname eq 'link') { + # link elements won't overlap, because each is + # additive -> easier to store text + $self->{parsed_tree}{head}{link} .= $text if length $text; + } + # note: if adding other elements that also appear outside <head>, + # you will need to check that you are inside <head> by setting + # a flag when entering it and unsetting it when exiting }; my $end_h = sub { my($self, $tagname, $skipped_text) = @_; # use $p itself as a tmp storage (ok according to the docs) + # <title> and <body> get special treatment + if ($tagname eq 'title' or $tagname eq 'body') { $self->{parsed_tree}->{$tagname} = $skipped_text; + } }; - my $p = HTML::Parser->new(api_version => 3, - report_tags => [qw(title meta body)], - start_h => [$start_h, "self, tagname, attr"], - end_h => [$end_h, "self, tagname, skipped_text"], - ); + my $p = HTML::Parser->new( + api_version => 3, + report_tags => [qw(title meta body base link)], + start_h => [$start_h, "self, tagname, attr, text"], + end_h => [$end_h, "self, tagname, skipped_text"], + ); + # init + $p->{parsed_tree}{head}{meta} = {}; # Parse document text chunk by chunk $p->parse(${ $self->{content} }); $p->eof; @@ -180,7 +221,8 @@ Retrieve and set the meta data that describes the input document into the I<meta> object attribute. The I<title> and I<link> meta attributes -are getting set. +are getting set. the rest of the E<lt>headE<gt> is made available for +the templates too. =back
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]