[perl.git] branch blead updated. v5.31.0-71-g0478e945a3

Karl Williamson Mon, 27 May 2019 11:40:47 -0700

In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/0478e945a30d8c53408e52b0e5844af899471b96?hp=926760a67ed43045b7686a4475d0362da2bbcd8b>


- Log -----------------------------------------------------------------
commit 0478e945a30d8c53408e52b0e5844af899471b96
Author: Karl Williamson <[email protected]>
Date:   Fri May 24 22:12:56 2019 -0600

    Update Pod-Simple to CPAN version 3.36
    
    [DELTA]
    
    3.36
        Added Pod::Simple::JustPod to extract the pod lines from a file
        Improved detection of input encoding CP1252 vs UTF-8
        Fixed github issue #79, =cut event out of order
        Fixed github issue #85, verbatim_indent doesn't work on HTML
        Fixed github issue #89, css files refer to themselves
        Fixed github issue #92, broken RTF with Unicode inputs
        Extended RTF to handle Unicode code points above 0xFFFF
        Nested L<> is now flagged as an error
        Turned off negative repeat count does nothing warnings
        Fixed/improved some docs about this distribution

-----------------------------------------------------------------------

Summary of changes:
 MANIFEST                                           |   6 +
 Porting/Maintainers.pl                             |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple.pm                  |  33 +-
 cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm         | 699 +++++++++++++++------
 cpan/Pod-Simple/lib/Pod/Simple/Checker.pm          |   6 +-
 cpan/Pod-Simple/lib/Pod/Simple/Debug.pm            |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm       |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm        |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/HTML.pm             |   4 +-
 cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm        |  30 +-
 cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm          | 362 +++++++++++
 cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm      |   4 +-
 cpan/Pod-Simple/lib/Pod/Simple/Methody.pm          |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/Progress.pm         |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/PullParser.pm       |   2 +-
 .../lib/Pod/Simple/PullParserEndToken.pm           |   2 +-
 .../lib/Pod/Simple/PullParserStartToken.pm         |   2 +-
 .../lib/Pod/Simple/PullParserTextToken.pm          |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/PullParserToken.pm  |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/RTF.pm              | 179 +++---
 cpan/Pod-Simple/lib/Pod/Simple/Search.pm           |   4 +-
 cpan/Pod-Simple/lib/Pod/Simple/SimpleTree.pm       |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/Subclassing.pod     |  14 +-
 cpan/Pod-Simple/lib/Pod/Simple/Text.pm             |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/TextContent.pm      |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/TiedOutFH.pm        |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/Transcode.pm        |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/TranscodeDumb.pm    |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/TranscodeSmart.pm   |   2 +-
 cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm            |  10 +-
 cpan/Pod-Simple/lib/Pod/Simple/XMLOutStream.pm     |   2 +-
 cpan/Pod-Simple/t/00about.t                        |   2 +-
 cpan/Pod-Simple/t/JustPod01.t                      | 219 +++++++
 .../t/data/basic.pod => Pod-Simple/t/JustPod02.t}  | 109 +++-
 cpan/Pod-Simple/t/JustPod_corpus.t                 | 155 +++++
 cpan/Pod-Simple/t/corpus/polish_utf8.txt           |  19 +-
 cpan/Pod-Simple/t/corpus/polish_utf8.xml           |  37 +-
 cpan/Pod-Simple/t/encod04.t                        |  79 ++-
 cpan/Pod-Simple/t/fcodes_s.t                       |  36 +-
 cpan/Pod-Simple/t/github_issue_79.t                |  73 +++
 cpan/Pod-Simple/t/html01.t                         |  12 +-
 cpan/Pod-Simple/t/perlcyg.pod                      |   2 +-
 cpan/Pod-Simple/t/rtf_utf8.t                       | 220 +++++++
 cpan/Pod-Simple/t/search50.t                       |   1 +
 cpan/Pod-Simple/t/whine.t                          |  22 +-
 cpan/Pod-Simple/t/x_nixer.t                        |   2 +-
 cpan/Pod-Simple/t/xhtml01.t                        |  12 +-
 47 files changed, 1953 insertions(+), 436 deletions(-)
 create mode 100644 cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm
 create mode 100644 cpan/Pod-Simple/t/JustPod01.t
 copy cpan/{podlators/t/data/basic.pod => Pod-Simple/t/JustPod02.t} (77%)
 create mode 100644 cpan/Pod-Simple/t/JustPod_corpus.t
 create mode 100644 cpan/Pod-Simple/t/github_issue_79.t
 create mode 100644 cpan/Pod-Simple/t/rtf_utf8.t

diff --git a/MANIFEST b/MANIFEST
index de891fc8ff..f21f0f5471 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1613,6 +1613,7 @@ cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm               
turn Pod into XML
 cpan/Pod-Simple/lib/Pod/Simple/HTML.pm                 convert Pod to HTML
 cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm            convert several Pod 
files to several HTML files
 cpan/Pod-Simple/lib/Pod/Simple/HTMLLegacy.pm           Pod::Simple::HTMLLegacy
+cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm
 cpan/Pod-Simple/lib/Pod/Simple/LinkSection.pm          represent "section" 
attributes of L codes
 cpan/Pod-Simple/lib/Pod/Simple/Methody.pm              turn Pod::Simple events 
into method calls
 cpan/Pod-Simple/lib/Pod/Simple/Progress.pm             Pod::Simple::Progress
@@ -1731,6 +1732,7 @@ cpan/Pod-Simple/t/fcodes_l.t                              
Pod::Simple test file
 cpan/Pod-Simple/t/fcodes_s.t                           Pod::Simple test file
 cpan/Pod-Simple/t/for.t                                        Pod::Simple 
test file
 cpan/Pod-Simple/t/fornot.t                             Pod::Simple test file
+cpan/Pod-Simple/t/github_issue_79.t
 cpan/Pod-Simple/t/heads.t                              Pod::Simple test file
 cpan/Pod-Simple/t/html01.t                             Pod::Simple test file
 cpan/Pod-Simple/t/html02.t                             Pod::Simple test file
@@ -1743,6 +1745,9 @@ cpan/Pod-Simple/t/junk1.pod                               
Pod::Simple test file
 cpan/Pod-Simple/t/junk1o.txt                           Pod::Simple test file
 cpan/Pod-Simple/t/junk2.pod                            Pod::Simple test file
 cpan/Pod-Simple/t/junk2o.txt                           Pod::Simple test file
+cpan/Pod-Simple/t/JustPod01.t
+cpan/Pod-Simple/t/JustPod02.t
+cpan/Pod-Simple/t/JustPod_corpus.t
 cpan/Pod-Simple/t/linkclas.t                           Pod::Simple test file
 cpan/Pod-Simple/t/output.t                             Pod::Simple test file
 cpan/Pod-Simple/t/perlcyg.pod                          Pod::Simple test file
@@ -1755,6 +1760,7 @@ cpan/Pod-Simple/t/puller.t                                
Pod::Simple test file
 cpan/Pod-Simple/t/pulltitl.t                           Pod::Simple test file
 cpan/Pod-Simple/t/reinit.t                             Pod::Simple test file
 cpan/Pod-Simple/t/render.t                             Pod::Simple test file
+cpan/Pod-Simple/t/rtf_utf8.t
 cpan/Pod-Simple/t/search05.t                           Pod::Simple test file
 cpan/Pod-Simple/t/search10.t                           Pod::Simple test file
 cpan/Pod-Simple/t/search12.t                           Pod::Simple test file
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index 58169fabb4..a84fbf59f3 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -926,7 +926,7 @@ use File::Glob qw(:case);
     },
 
     'Pod::Simple' => {
-        'DISTRIBUTION' => 'KHW/Pod-Simple-3.35.tar.gz',
+        'DISTRIBUTION' => 'KHW/Pod-Simple-3.36.tar.gz',
         'FILES'        => q[cpan/Pod-Simple],
     },
 
diff --git a/cpan/Pod-Simple/lib/Pod/Simple.pm 
b/cpan/Pod-Simple/lib/Pod/Simple.pm
index 20924153b6..a9db8c2a68 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple.pm
@@ -18,7 +18,7 @@ use vars qw(
 );
 
 @ISA = ('Pod::Simple::BlackBox');
-$VERSION = '3.35';
+$VERSION = '3.36';
 
 @Known_formatting_codes = qw(I B C L E F S X Z); 
 %Known_formatting_codes = map(($_=>1), @Known_formatting_codes);
@@ -74,6 +74,9 @@ else { # EBCDIC on early Perl.  We know what the values are 
for the code
 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
 __PACKAGE__->_accessorize(
+  '_output_is_for_JustPod', # For use only by Pod::Simple::JustPod,
+                       # If non-zero, don't expand Z<> E<> S<> L<>,
+                       # and count how many brackets in format codes
   'nbsp_for_S',        # Whether to map S<...>'s to \xA0 characters
   'source_filename',   # Filename of the source, for use in warnings
   'source_dead',       # Whether to consider this parser's source dead
@@ -168,6 +171,7 @@ sub encoding {
 BEGIN {
   *pretty        = \&Pod::Simple::BlackBox::pretty;
   *stringify_lol = \&Pod::Simple::BlackBox::stringify_lol;
+  *my_qr         = \&Pod::Simple::BlackBox::my_qr;
 }
 
 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -339,10 +343,9 @@ sub unaccept_targets {
 
 # XXX Probably it is an error that the digit '9' is excluded from these re's.
 # Broken for early Perls on EBCDIC
-my $xml_name_re = eval "qr/[^-.0-8:A-Z_a-z[:^ascii:]]/";
-if (! defined $xml_name_re) {
-    $xml_name_re = qr/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/;
-}
+my $xml_name_re = my_qr('[^-.0-8:A-Z_a-z[:^ascii:]]', '9');
+$xml_name_re = qr/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/
+                                                            unless 
$xml_name_re;
 
 sub accept_code { shift->accept_codes(@_) } # alias
 
@@ -652,12 +655,13 @@ sub _make_treelet {
     $treelet = $self->_treelet_from_formatting_codes(@_);
   }
   
-  if( $self->_remap_sequences($treelet) ) {
+  if( ! $self->{'_output_is_for_JustPod'}   # Retain these as-is for pod output
+     && $self->_remap_sequences($treelet) )
+  {
     $self->_treat_Zs($treelet);  # Might as well nix these first
     $self->_treat_Ls($treelet);  # L has to precede E and S
     $self->_treat_Es($treelet);
     $self->_treat_Ss($treelet);  # S has to come after E
-
     $self->_wrap_up($treelet); # Nix X's and merge texties
     
   } else {
@@ -1080,9 +1084,14 @@ sub _treat_Ls {  # Process our dear dear friends, the 
L<...> sequences
       
       # By here, $treelet->[$i] is definitely an L node
       my $ell = $treelet->[$i];
-      DEBUG > 1 and print STDERR "Ogling L node $ell\n";
+      DEBUG > 1 and print STDERR "Ogling L node " . pretty($ell) . "\n";
         
-      # bitch if it's empty
+      # bitch if it's empty or is just '/'
+      if (@{$ell} == 3 and $ell->[2] =~ m!\A\s*/\s*\z!) {
+        $self->whine( $start_line, "L<> contains only '/'" );
+        $treelet->[$i] = 'L</>';  # just make it a text node
+        next;  # and move on
+      }
       if(  @{$ell} == 2
        or (@{$ell} == 3 and $ell->[2] eq '')
       ) {
@@ -1289,6 +1298,7 @@ sub _treat_Ls {  # Process our dear dear friends, the 
L<...> sequences
         $section_name = [splice @ell_content];
         $section_name->[ 0] =~ s/^\"//s;
         $section_name->[-1] =~ s/\"$//s;
+        $ell->[1]{'~tolerated'} = 1;
       }
 
       # Turn L<Foo Bar> into L</Foo Bar>.
@@ -1296,8 +1306,8 @@ sub _treat_Ls {  # Process our dear dear friends, the 
L<...> sequences
          and grep !ref($_) && m/ /s, @ell_content
       ) {
         $section_name = [splice @ell_content];
+        $ell->[1]{'~deprecated'} = 1;
         # That's support for the now-deprecated syntax.
-        # (Maybe generate a warning eventually?)
         # Note that it deliberately won't work on L<...|Foo Bar>
       }
 
@@ -1347,7 +1357,7 @@ sub _treat_Ls {  # Process our dear dear friends, the 
L<...> sequences
       # And update children to be the link-text:
       @$ell = (@$ell[0,1], defined($link_text) ? splice(@$link_text) : '');
       
-      DEBUG > 2 and print STDERR "End of L-parsing for this node 
$treelet->[$i]\n";
+      DEBUG > 2 and print STDERR "End of L-parsing for this node " . 
pretty($treelet->[$i]) . "\n";
 
       unshift @stack, $treelet->[$i]; # might as well recurse
     }
@@ -1507,6 +1517,7 @@ sub _accessorize {  # A simple-minded method-maker
       $Carp::CarpLevel = 1,  Carp::croak(
        "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)"
       ) unless (@_ == 1 or @_ == 2) and ref $_[0];
+
       (@_ == 1) ?  $_[0]->{$attrname}
                 : ($_[0]->{$attrname} = $_[1]);
     };
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
index 9fe3f702ef..7f30052b0d 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/BlackBox.pm
@@ -22,8 +22,36 @@ use integer; # vroom!
 use strict;
 use Carp ();
 use vars qw($VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
 #use constant DEBUG => 7;
+
+sub my_qr ($$) {
+
+    # $1 is a pattern to compile and return.  Older perls compile any
+    # syntactically valid property, even if it isn't legal.  To cope with
+    # this, return an empty string unless the compiled pattern also
+    # successfully matches $2, which the caller furnishes.
+
+    my ($input_re, $should_match) = @_;
+    # XXX could have a third parameter $shouldnt_match for extra safety
+
+    my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : "";
+
+    my $re = eval "no warnings; $use_utf8 qr/$input_re/";
+    #print STDERR  __LINE__, ": $input_re: $@\n" if $@;
+    return "" if $@;
+
+    my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/";
+    #print STDERR  __LINE__, ": $input_re: $@\n" if $@;
+    return "" if $@;
+
+    #print STDERR  __LINE__, ": SUCCESS: $re\n" if $matches;
+    return $re if $matches;
+
+    #print STDERR  __LINE__, ": $re: didn't match\n";
+    return "";
+}
+
 BEGIN {
   require Pod::Simple;
   *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
@@ -32,8 +60,37 @@ BEGIN {
 # Matches a character iff the character will have a different meaning
 # if we choose CP1252 vs UTF-8 if there is no =encoding line.
 # This is broken for early Perls on non-ASCII platforms.
-my $non_ascii_re = eval "qr/[[:^ascii:]]/";
-$non_ascii_re = qr/[\x80-\xFF]/ if ! defined $non_ascii_re;
+my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6");
+$non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re;
+
+# Use patterns understandable by Perl 5.6, if possible
+my $cs_re = my_qr('\p{IsCs}', "\x{D800}");
+my $cn_re = my_qr('\p{IsCn}', "\x{09E4}");  # <reserved> code point unlikely
+                                            # to get assigned
+my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]',
+                           "\x{250}");
+$rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless 
$rare_blocks_re;
+
+my $script_run_re = eval 'no warnings "experimental::script_run";
+                          qr/(*script_run: ^ .* $ )/x';
+my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}");
+unless ($latin_re) {
+    # This was machine generated to be the ranges of the union of the above
+    # three properties, with things that were undefined by Unicode 4.1 filling
+    # gaps.  That is the version in use when Perl advanced enough to
+    # successfully compile and execute the above pattern.
+    $latin_re = 
my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]',
 "\x{100}");
+}
+
+my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A");
+
+# Latin script code points not in the first release of Unicode
+my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}");
+
+# If this perl doesn't have the Deprecated property, there's only one code
+# point in it that we need be concerned with.
+my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}");
+$deprecated_re = qr/\x{149}/ unless $deprecated_re;
 
 my $utf8_bom;
 if (($] ge 5.007_003)) {
@@ -57,10 +114,10 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
   my $cut_handler  = $self->{'cut_handler'};
   my $wl_handler   = $self->{'whiteline_handler'};
   $self->{'line_count'} ||= 0;
- 
+
   my $scratch;
 
-  DEBUG > 4 and 
+  DEBUG > 4 and
    print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n";
 
   DEBUG > 5 and
@@ -71,9 +128,17 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
    # paragraph buffer.  Because we need to defer processing of =over
    # directives and verbatim paragraphs.  We call _ponder_paragraph_buffer
    # to process this.
-  
+
   $self->{'pod_para_count'} ||= 0;
 
+  # An attempt to match the pod portions of a line.  This is not fool proof,
+  # but is good enough to serve as part of the heuristic for guessing the pod
+  # encoding if not specified.
+  my $format_codes = join "", '[', grep { / ^ [A-Za-z] $/x }
+                                                keys %{$self->{accept_codes}};
+  $format_codes .= ']';
+  my $pod_chars_re = qr/ ^ = [A-Za-z]+ | $format_codes < /x;
+
   my $line;
   foreach my $source_line (@_) {
     if( $self->{'source_dead'} ) {
@@ -97,7 +162,7 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
       ($line = $source_line) =~ tr/\n\r//d;
        # If we don't have two vars, we'll end up with that there
        # tr/// modding the (potentially read-only) original source line!
-    
+
     } else {
       DEBUG > 2 and print STDERR "First line: [$source_line]\n";
 
@@ -106,7 +171,7 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
         $self->_handle_encoding_line( "=encoding utf8" );
         delete $self->{'_processed_encoding'};
         $line =~ tr/\n\r//d;
-        
+
       } elsif( $line =~ s/^\xFE\xFF//s ) {
         DEBUG and print STDERR "Big-endian UTF-16 BOM seen.  Aborting 
parsing.\n";
         $self->scream(
@@ -130,7 +195,7 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
         next;
 
         # TODO: implement somehow?
-        
+
       } else {
         DEBUG > 2 and print STDERR "First line is BOM-less.\n";
         ($line = $source_line) =~ tr/\n\r//d;
@@ -144,8 +209,8 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
 
       my $encoding;
 
-      # No =encoding line, and we are at the first line in the input that
-      # contains a non-ascii byte, that is one whose meaning varies depending
+      # No =encoding line, and we are at the first pod line in the input that
+      # contains a non-ascii byte, that is, one whose meaning varies depending
       # on whether the file is encoded in UTF-8 or CP1252, which are the two
       # possibilities permitted by the pod spec.  (ASCII is assumed if the
       # file only contains ASCII bytes.)  In order to process this line, we
@@ -162,22 +227,28 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
       # without conflict.  CP 1252 uses most of them for graphic characters.
       #
       # Note that all ASCII-range bytes represent their corresponding code
-      # points in CP1252 and UTF-8.  In ASCII platform UTF-8 all other code
-      # points require multiple (non-ASCII) bytes to represent.  (A separate
-      # paragraph for EBCDIC is below.)  The multi-byte representation is
-      # quite structured.  If we find an isolated byte that requires multiple
-      # bytes to represent in UTF-8, we know that the encoding is not UTF-8.
-      # If we find a sequence of bytes that violates the UTF-8 structure, we
-      # also can presume the encoding isn't UTF-8, and hence must be 1252.
+      # points in both CP1252 and UTF-8.  In ASCII platform UTF-8, all other
+      # code points require multiple (non-ASCII) bytes to represent.  (A
+      # separate paragraph for EBCDIC is below.)  The multi-byte
+      # representation is quite structured.  If we find an isolated byte that
+      # would require multiple bytes to represent in UTF-8, we know that the
+      # encoding is not UTF-8.  If we find a sequence of bytes that violates
+      # the UTF-8 structure, we also can presume the encoding isn't UTF-8, and
+      # hence must be 1252.
       #
       # But there are ambiguous cases where we could guess wrong.  If so, the
       # user will end up having to supply an =encoding line.  We use all
       # readily available information to improve our chances of guessing
       # right.  The odds of something not being UTF-8, but still passing a
       # UTF-8 validity test go down very rapidly with increasing length of the
-      # sequence.  Therefore we look at all the maximal length non-ascii
-      # sequences on the line.  If any of the sequences can't be UTF-8, we
-      # quit there and choose CP1252.  If all could be UTF-8, we guess UTF-8.
+      # sequence.  Therefore we look at all non-ascii sequences on the line.
+      # If any of the sequences can't be UTF-8, we quit there and choose
+      # CP1252.  If all could be UTF-8, we see if any of the code points
+      # represented are unlikely to be in pod.  If so, we guess CP1252.  If
+      # not, we check if the line is all in the same script; if not guess
+      # CP1252; otherwise UTF-8.  For perls that don't have convenient script
+      # run testing, see if there is both Latin and non-Latin.  If so, CP1252,
+      # otherwise UTF-8.
       #
       # On EBCDIC platforms, the situation is somewhat different.  In
       # UTF-EBCDIC, not only do ASCII-range bytes represent their code points,
@@ -188,51 +259,188 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
       # very unlikely to be in pod text.  So if we encounter one of them, it
       # means that it is quite likely CP1252 and not UTF-8.  The net result is
       # the same code below is used for both platforms.
-      while ($line =~ m/($non_ascii_re+)/g) {
-        my $non_ascii_seq = $1;
-
-        if (length $non_ascii_seq == 1) {
-          $encoding = 'CP1252';
-          goto guessed;
-        } elsif ($] ge 5.007_003) {
-
-          # On Perls that have this function, we can see if the sequence is
-          # valid UTF-8 or not.
-          my $is_utf8;
-          {
-            no warnings 'utf8';
-            $is_utf8 = utf8::decode($non_ascii_seq);
+      #
+      # XXX probably if the line has E<foo> that evaluates to illegal CP1252,
+      # then it is UTF-8.  But we haven't processed E<> yet.
+
+      goto set_1252 if $] lt 5.006_000;    # No UTF-8 on very early perls
+
+      my $copy;
+
+      no warnings 'utf8';
+
+      if ($] ge 5.007_003) {
+        $copy = $line;
+
+        # On perls that have this function, we can use it to easily see if the
+        # sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag
+        # needed below for script run detection
+        goto set_1252 if ! utf8::decode($copy);
+      }
+      elsif (ord("A") != 65) {  # Early EBCDIC, assume UTF-8.  What's a windows
+                                # code page doing here anyway?
+        goto set_utf8;
+      }
+      else { # ASCII, no decode(): do it ourselves using the fundamental
+             # characteristics of UTF-8
+        use if $] le 5.006002, 'utf8';
+
+        my $char_ord;
+        my $needed;         # How many continuation bytes to gobble up
+
+        # Initialize the translated line with a dummy character that will be
+        # deleted after everything else is done.  This dummy makes sure that
+        # $copy will be in UTF-8.  Doing it now avoids the bugs in early perls
+        # with upgrading in the middle
+        $copy = chr(0x100);
+
+        # Parse through the line
+        for (my $i = 0; $i < length $line; $i++) {
+          my $byte = substr($line, $i, 1);
+
+          # ASCII bytes are trivially dealt with
+          if ($byte !~ $non_ascii_re) {
+            $copy .= $byte;
+            next;
+          }
+
+          my $b_ord = ord $byte;
+
+          # Now figure out what this code point would be if the input is
+          # actually in UTF-8.  If, in the process, we discover that it isn't
+          # well-formed UTF-8, we guess CP1252.
+          #
+          # Start the process.  If it is UTF-8, we are at the first, start
+          # byte, of a multi-byte sequence.  We look at this byte to figure
+          # out how many continuation bytes are needed, and to initialize the
+          # code point accumulator with the data from this byte.
+          #
+          # Normally the minimum continuation byte is 0x80, but in certain
+          # instances the minimum is a higher number.  So the code below
+          # overrides this for those instances.
+          my $min_cont = 0x80;
+
+          if ($b_ord < 0xC2) { #  A start byte < C2 is malformed
+            goto set_1252;
+          }
+          elsif ($b_ord <= 0xDF) {
+            $needed = 1;
+            $char_ord = $b_ord & 0x1F;
+          }
+          elsif ($b_ord <= 0xEF) {
+            $min_cont = 0xA0 if $b_ord == 0xE0;
+            $needed = 2;
+            $char_ord = $b_ord & (0x1F >> 1);
           }
-          if (! $is_utf8) {
-            $encoding = 'CP1252';
-            goto guessed;
+          elsif ($b_ord <= 0xF4) {
+            $min_cont = 0x90 if $b_ord == 0xF0;
+            $needed = 3;
+            $char_ord = $b_ord & (0x1F >> 2);
           }
-        } elsif (ord("A") == 65) {  # An early Perl, ASCII platform
-
-          # Without utf8::decode, it's a lot harder to do a rigorous check
-          # (though some early releases had a different function that
-          # accomplished the same thing).  Since these are ancient Perls, not
-          # likely to be in use today, we take the easy way out, and look at
-          # just the first two bytes of the sequence to see if they are the
-          # start of a UTF-8 character.  In ASCII UTF-8, continuation bytes
-          # must be between 0x80 and 0xBF.  Start bytes can range from 0xC2
-          # through 0xFF, but anything above 0xF4 is not Unicode, and hence
-          # extremely unlikely to be in a pod.
-          if ($non_ascii_seq !~ /^[\xC2-\xF4][\x80-\xBF]/) {
-            $encoding = 'CP1252';
-            goto guessed;
+          else { # F4 is the highest start byte for legal Unicode; higher is
+                 # unlikely to be in pod.
+            goto set_1252;
           }
 
-          # We don't bother doing anything special for EBCDIC on early Perls.
-          # If there is a solitary variant, CP1252 will be chosen; otherwise
-          # UTF-8.
-        }
-      } # End of loop through all variant sequences on the line
+          # ? not enough continuation bytes available
+          goto set_1252 if $i + $needed >= length $line;
+
+          # Accumulate the ordinal of the character from the remaining
+          # (continuation) bytes.
+          while ($needed-- > 0) {
+            my $cont = substr($line, ++$i, 1);
+            $b_ord = ord $cont;
+            goto set_1252 if $b_ord < $min_cont || $b_ord > 0xBF;
+
+            # In all cases, any next continuation bytes all have the same
+            # minimum legal value
+            $min_cont = 0x80;
+
+            # Accumulate this byte's contribution to the code point
+            $char_ord <<= 6;
+            $char_ord |= ($b_ord & 0x3F);
+          }
+
+          # Here, the sequence that formed this code point was valid UTF-8,
+          # so add the completed character to the output
+          $copy .= chr $char_ord;
+        } # End of loop through line
+
+        # Delete the dummy first character
+        $copy = substr($copy, 1);
+      }
+
+      # Here, $copy is legal UTF-8.
+
+      # If it can't be legal CP1252, no need to look further.  (These bytes
+      # aren't valid in CP1252.)  This test could have been placed higher in
+      # the code, but it seemed wrong to set the encoding to UTF-8 without
+      # making sure that the very first instance is well-formed.  But what if
+      # it isn't legal CP1252 either?  We have to choose one or the other, and
+      # It seems safer to favor the single-byte encoding over the multi-byte.
+      goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/;
+
+      # The C1 controls are not likely to appear in pod
+      goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/;
+
+      # Nor are surrogates nor unassigned, nor deprecated.
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ 
$cs_re;
+      goto set_1252 if $cs_re && $copy =~ $cs_re;
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re 
&& $copy =~ $cn_re;
+      goto set_1252 if $cn_re && $copy =~ $cn_re;
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ 
$deprecated_re;
+      goto set_1252 if $copy =~ $deprecated_re;
+
+      # Nor are rare code points.  But this is hard to determine.  khw
+      # believes that IPA characters and the modifier letters are unlikely to
+      # be in pod (and certainly very unlikely to be the in the first line in
+      # the pod containing non-ASCII)
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ 
$rare_blocks_re;
+      goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re;
+
+      # The first Unicode version included essentially every Latin character
+      # in modern usage.  So, a Latin character not in the first release will
+      # unlikely be in pod.
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if 
$later_latin_re && $copy =~ $later_latin_re;
+      goto set_1252 if $later_latin_re && $copy =~ $later_latin_re;
+
+      # On perls that handle script runs, if the UTF-8 interpretation yields
+      # a single script, we guess UTF-8, otherwise just having a mixture of
+      # scripts is suspicious, so guess CP1252.  We first strip off, as best
+      # we can, the ASCII characters that look like they are pod directives,
+      # as these would always show as mixed with non-Latin text.
+      $copy =~ s/$pod_chars_re//g;
+
+      if ($script_run_re) {
+        goto set_utf8 if $copy =~ $script_run_re;
+        DEBUG > 8 and print STDERR __LINE__, ":  not script run\n";
+        goto set_1252;
+      }
 
-      # All sequences in the line could be UTF-8.  Guess that.
+      # Even without script runs, but on recent enough perls and Unicodes, we
+      # can check if there is a mixture of both Latin and non-Latin.  Again,
+      # having a mixture of scripts is suspicious, so assume CP1252
+
+      # If it's all non-Latin, there is no CP1252, as that is Latin
+      # characters and punct, etc.
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ 
$latin_re;
+      goto set_utf8 if $copy !~ $latin_re;
+
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ 
$every_char_is_latin_re;
+      goto set_utf8 if $copy =~ $every_char_is_latin_re;
+
+      DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n";
+
+     set_1252:
+      DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n";
+      $encoding = 'CP1252';
+      goto done_set;
+
+     set_utf8:
+      DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n";
       $encoding = 'UTF-8';
 
-    guessed:
+     done_set:
       $self->_handle_encoding_line( "=encoding $encoding" );
       delete $self->{'_processed_encoding'};
       $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
@@ -254,13 +462,13 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
             $self->{'line_count'},
             "=cut found outside a pod block.  Skipping to next block."
           );
-          
+
           ## Before there were errata sections in the world, it was
           ## least-pessimal to abort processing the file.  But now we can
           ## just barrel on thru (but still not start a pod block).
           #splice @_;
           #push @_, undef;
-          
+
           next;
         } else {
           $self->{'in_pod'} = $self->{'start_of_pod_block'}
@@ -273,7 +481,7 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
          if $code_handler;
         # Note: this may cause code to be processed out of order relative
         #  to pods, but in order relative to cuts.
-        
+
         # Note also that we haven't yet applied the transcoding to $line
         #  by time we call $code_handler!
 
@@ -284,11 +492,11 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
           DEBUG > 1 and print STDERR "# Setting nextline to $1\n";
           $self->{'line_count'} = $1 - 1;
         }
-        
+
         next;
       }
     }
-    
+
     # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
     # Else we're in pod mode:
 
@@ -308,12 +516,13 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
       # ++$self->{'pod_para_count'};
       $self->_ponder_paragraph_buffer();
        # by now it's safe to consider the previous paragraph as done.
+      DEBUG > 6 and print STDERR "Processing any cut handler, line 
${$self}{'line_count'}\n";
       $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
        if $cut_handler;
 
       # TODO: add to docs: Note: this may cause cuts to be processed out
       #  of order relative to pods, but in order relative to code.
-      
+
     } elsif($line =~ m/^(\s*)$/s) {  # it's a blank line
       if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
         $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
@@ -324,29 +533,30 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
         DEBUG > 1 and print STDERR "Saving blank line at line 
${$self}{'line_count'}\n";
         push @{$paras->[-1]}, $line;
       }  # otherwise it's not interesting
-      
+
       if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
         DEBUG > 1 and print STDERR "Noting para ends with blank line at 
${$self}{'line_count'}\n";
       }
-      
+
       $self->{'last_was_blank'} = 1;
-      
+
     } elsif($self->{'last_was_blank'}) {  # A non-blank line starting a new 
para...
-      
-      if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
+
+      if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(\s+|$)(.*)/s) {
         # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
-        my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
+        my $new = [$1, {'start_line' => $self->{'line_count'}}, $3];
+        $new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " ";
          # Note that in "=head1 foo", the WS is lost.
          # Example: ['=head1', {'start_line' => 123}, ' foo']
-        
+
         ++$self->{'pod_para_count'};
-        
+
         $self->_ponder_paragraph_buffer();
          # by now it's safe to consider the previous paragraph as done.
-                
+
         push @$paras, $new; # the new incipient paragraph
         DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line 
${$self}{'line_count'}\n";
-        
+
       } elsif($line =~ m/^\s/s) {
 
         if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq 
'~Verbatim') {
@@ -379,7 +589,7 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
       }
       $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
     }
-    
+
   } # ends the big while loop
 
   DEBUG > 1 and print STDERR (pretty(@$paras), "\n");
@@ -390,7 +600,7 @@ sub parse_lines {             # Usage: 
$parser->parse_lines(@lines)
 
 sub _handle_encoding_line {
   my($self, $line) = @_;
-  
+
   return if $self->parse_characters;
 
   # The point of this routine is to set $self->{'_transcoder'} as indicated.
@@ -492,7 +702,7 @@ sub _handle_encoding_line {
 
 sub _handle_encoding_second_level {
   # By time this is called, the encoding (if well formed) will already
-  #  have been acted one.
+  #  have been acted on.
   my($self, $para) = @_;
   my @x = @$para;
   my $content = join ' ', splice @x, 2;
@@ -500,7 +710,7 @@ sub _handle_encoding_second_level {
   $content =~ s/\s+$//s;
 
   DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n";
-  
+
   if (defined($self->{'_processed_encoding'})) {
     #if($content ne $self->{'_processed_encoding'}) {
     #  Could it happen?
@@ -518,14 +728,14 @@ sub _handle_encoding_second_level {
     } else {
       DEBUG > 2 and print STDERR " (Yup, it was successfully handled 
already.)\n";
     }
-    
+
   } else {
     # Otherwise it's a syntax error
     $self->whine( $para->[1]{'start_line'},
       "Invalid =encoding syntax: $content"
     );
   }
-  
+
   return;
 }
 
@@ -542,7 +752,7 @@ sub _gen_errata {
   return() unless $self->{'errata'} and keys %{$self->{'errata'}};
 
   my @out;
-  
+
   foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
     push @out,
       ['=item', {'start_line' => $m}, "Around line $line:"],
@@ -555,7 +765,7 @@ sub _gen_errata {
       )
     ;
   }
-  
+
   # TODO: report of unknown entities? unrenderable characters?
 
   unshift @out,
@@ -569,7 +779,7 @@ sub _gen_errata {
     ['=over',  {'start_line' => $m, 'errata' => 1}, ''],
   ;
 
-  push @out, 
+  push @out,
     ['=back',  {'start_line' => $m, 'errata' => 1}, ''],
   ;
 
@@ -610,7 +820,7 @@ sub _ponder_paragraph_buffer {
   #                   Document,
   #                   Data, Para, Verbatim
   #                   B, C, longdirname (TODO -- wha?), etc. for all directives
-  # 
+  #
 
   my $self = $_[0];
   my $paras;
@@ -624,11 +834,11 @@ sub _ponder_paragraph_buffer {
   # We have something in our buffer.  So apparently the document has started.
   unless($self->{'doc_has_started'}) {
     $self->{'doc_has_started'} = 1;
-    
+
     my $starting_contentless;
     $starting_contentless =
      (
-       !@$curr_open  
+       !@$curr_open
        and @$paras and ! grep $_->[0] ne '~end', @$paras
         # i.e., if the paras is all ~ends
      )
@@ -637,7 +847,7 @@ sub _ponder_paragraph_buffer {
       $starting_contentless ? 'contentless' : 'contentful',
       " document\n"
     ;
-    
+
     $self->_handle_element_start(
       ($scratch = 'Document'),
       {
@@ -649,15 +859,28 @@ sub _ponder_paragraph_buffer {
 
   my($para, $para_type);
   while(@$paras) {
-    last if @$paras == 1 and
-      ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
-        or $paras->[0][0] eq '=item' )
-    ;
+    last if      @$paras == 1
+            and (    $paras->[0][0] eq '=over'
+                 or  $paras->[0][0] eq '=item'
+                 or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'}));
     # Those're the three kinds of paragraphs that require lookahead.
     #   Actually, an "=item Foo" inside an <over type=text> region
     #   and any =item inside an <over type=block> region (rare)
     #   don't require any lookahead, but all others (bullets
     #   and numbers) do.
+    # The verbatim is different from the other two, because those might be
+    # like:
+    #
+    #   =item
+    #   ...
+    #   =cut
+    #   ...
+    #   =item
+    #
+    # The =cut here finishes the paragraph but doesn't terminate the =over
+    # they should be in. (khw apologizes that he didn't comment at the time
+    # why the 'in_pod' works, and no longer remembers why, and doesn't think
+    # it is currently worth the effort to re-figure it out.)
 
 # TODO: whinge about many kinds of directives in non-resolving =for regions?
 # TODO: many?  like what?  =head1 etc?
@@ -667,7 +890,7 @@ sub _ponder_paragraph_buffer {
 
     DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the 
stack: (",
       $self->_dump_curr_open(), ")\n";
-    
+
     if($para_type eq '=for') {
       next if $self->_ponder_for($para,$curr_open,$paras);
 
@@ -704,7 +927,7 @@ sub _ponder_paragraph_buffer {
     } else {
 
       # All non-magical codes!!!
-      
+
       # Here we start using $para_type for our own twisted purposes, to
       #  mean how it should get treated, not as what the element name
       #  should be.
@@ -744,10 +967,10 @@ sub _ponder_paragraph_buffer {
           ;
           next;
         }
-        
-        
+
+
         my $over_type = $over->[1]{'~type'};
-        
+
         if(!$over_type) {
           # Shouldn't happen1
           die "Typeless over in stack, starting at line "
@@ -772,7 +995,7 @@ sub _ponder_paragraph_buffer {
           my $item_type = $self->_get_item_type($para);
             # That kills the content of the item if it's a number or bullet.
           DEBUG and print STDERR " Item is of type ", $para->[0], " under 
$over_type\n";
-          
+
           if($item_type eq 'text') {
             # Nothing special needs doing for 'text'
           } elsif($item_type eq 'number' or $item_type eq 'bullet') {
@@ -788,16 +1011,16 @@ sub _ponder_paragraph_buffer {
           } else {
             die "Unhandled item type $item_type"; # should never happen
           }
-          
+
           # =item-text thingies don't need any assimilation, it seems.
 
         } elsif($over_type eq 'number') {
           my $item_type = $self->_get_item_type($para);
             # That kills the content of the item if it's a number or bullet.
           DEBUG and print STDERR " Item is of type ", $para->[0], " under 
$over_type\n";
-          
+
           my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
-          
+
           if($item_type eq 'bullet') {
             # Hm, it's not numeric.  Correct for this.
             $para->[1]{'number'} = $expected_value;
@@ -822,7 +1045,7 @@ sub _ponder_paragraph_buffer {
 
           } elsif($expected_value == $para->[1]{'number'}) {
             DEBUG > 1 and print STDERR " Numeric item has the expected value 
of $expected_value\n";
-            
+
           } else {
             DEBUG > 1 and print STDERR " Numeric item has ", 
$para->[1]{'number'},
              " instead of the expected value of $expected_value\n";
@@ -833,7 +1056,7 @@ sub _ponder_paragraph_buffer {
             );
             $para->[1]{'number'} = $expected_value;  # correcting!!
           }
-            
+
           if(@$para == 2) {
             # For the cases where we /didn't/ push to @$para
             if($paras->[0][0] eq '~Para') {
@@ -850,13 +1073,13 @@ sub _ponder_paragraph_buffer {
           my $item_type = $self->_get_item_type($para);
             # That kills the content of the item if it's a number or bullet.
           DEBUG and print STDERR " Item is of type ", $para->[0], " under 
$over_type\n";
-          
+
           if($item_type eq 'bullet') {
             # as expected!
 
             if( $para->[1]{'~_freaky_para_hack'} ) {
               DEBUG and print STDERR "Accomodating '=item * Foo' tolerance 
hack.\n";
-              push @$para, delete $para->[1]{'~_freaky_para_hack'};
+              push @$para, $para->[1]{'~_freaky_para_hack'};
             }
 
           } elsif($item_type eq 'number') {
@@ -944,15 +1167,15 @@ sub _ponder_paragraph_buffer {
           my @fors = grep $_->[0] eq '=for', @$curr_open;
           DEBUG > 1 and print STDERR "Containing fors: ",
             join(',', map $_->[1]{'target'}, @fors), "\n";
-          
+
           if(! @fors) {
             DEBUG and print STDERR "Treating $para_type paragraph as such 
because stack has no =for's\n";
-            
+
           #} elsif(grep $_->[1]{'~resolve'}, @fors) {
           #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
           } elsif( $fors[-1][1]{'~resolve'} ) {
             # Look to the immediately containing for
-          
+
             if($para_type eq 'Data') {
               DEBUG and print STDERR "Treating Data paragraph as 
Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a 
resolver\n";
               $para->[0] = 'Para';
@@ -971,7 +1194,7 @@ sub _ponder_paragraph_buffer {
       if($para_type eq 'Plain') {
         $self->_ponder_Plain($para);
       } elsif($para_type eq 'Verbatim') {
-        $self->_ponder_Verbatim($para);        
+        $self->_ponder_Verbatim($para);
       } elsif($para_type eq 'Data') {
         $self->_ponder_Data($para);
       } else {
@@ -989,7 +1212,7 @@ sub _ponder_paragraph_buffer {
       $self->_traverse_treelet_bit(@$para);
     }
   }
-  
+
   return;
 }
 
@@ -1024,9 +1247,9 @@ sub _ponder_for {
   }
   DEBUG > 1 and
    print STDERR "Faking out a =for $target as a =begin $target / =end 
$target\n";
-  
+
   $para->[0] = 'Data';
-  
+
   unshift @$paras,
     ['=begin',
       {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
@@ -1038,7 +1261,7 @@ sub _ponder_for {
       $target,
     ],
   ;
-  
+
   return 1;
 }
 
@@ -1055,20 +1278,20 @@ sub _ponder_begin {
     DEBUG and print STDERR "Ignoring targetless =begin\n";
     return 1;
   }
-  
+
   my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
   $para->[1]{'title'} = $title if ($title);
   $para->[1]{'target'} = $target;  # without any ':'
   $content = $target; # strip off the title
-  
+
   $content =~ s/^:!/!:/s;
   my $neg;  # whether this is a negation-match
   $neg = 1        if $content =~ s/^!//s;
   my $to_resolve;  # whether to process formatting codes
   $to_resolve = 1 if $content =~ s/^://s;
-  
+
   my $dont_ignore; # whether this target matches us
-  
+
   foreach my $target_name (
     split(',', $content, -1),
     $neg ? () : '*'
@@ -1076,7 +1299,7 @@ sub _ponder_begin {
     DEBUG > 2 and
      print STDERR " Considering whether =begin $content matches 
$target_name\n";
     next unless $self->{'accept_targets'}{$target_name};
-    
+
     DEBUG > 2 and
      print STDERR "  It DOES match the acceptable target $target_name!\n";
     $to_resolve = 1
@@ -1139,7 +1362,7 @@ sub _ponder_end {
     DEBUG and print STDERR "Ignoring targetless =end\n";
     return 1;
   }
-  
+
   unless($content =~ m/^\S+$/) {  # i.e., unless it's one word
     $self->whine(
       $para->[1]{'start_line'},
@@ -1149,7 +1372,7 @@ sub _ponder_end {
     DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
     return 1;
   }
-  
+
   unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
     $self->whine(
       $para->[1]{'start_line'},
@@ -1159,11 +1382,11 @@ sub _ponder_end {
     DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
     return 1;
   }
-  
+
   unless($content eq $curr_open->[-1][1]{'target'}) {
     $self->whine(
       $para->[1]{'start_line'},
-      "=end $content doesn't match =begin " 
+      "=end $content doesn't match =begin "
       . $curr_open->[-1][1]{'target'}
       . ".  (Stack: "
       . $self->_dump_curr_open() . ')'
@@ -1180,7 +1403,7 @@ sub _ponder_end {
   } else {
     $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
       # what's that for?
-    
+
     $self->{'content_seen'} ||= 1;
     $self->_handle_element_end( my $scratch = 'for', $para->[1]);
   }
@@ -1188,14 +1411,14 @@ sub _ponder_end {
   pop @$curr_open;
 
   return 1;
-} 
+}
 
 sub _ponder_doc_end {
   my ($self,$para,$curr_open,$paras) = @_;
   if(@$curr_open) { # Deal with things left open
     DEBUG and print STDERR "Stack is nonempty at end-document: (",
       $self->_dump_curr_open(), ")\n";
-      
+
     DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n";
     unshift @$paras, $self->_closers_for_all_curr_open;
     # Make sure there is exactly one ~end in the parastack, at the end:
@@ -1205,11 +1428,11 @@ sub _ponder_doc_end {
      #  generate errata, and then another to be at the end
      #  when that loop back around to process the errata.
     return 1;
-    
+
   } else {
     DEBUG and print STDERR "Okay, stack is empty now.\n";
   }
-  
+
   # Try generating errata section, if applicable
   unless($self->{'~tried_gen_errata'}) {
     $self->{'~tried_gen_errata'} = 1;
@@ -1220,7 +1443,7 @@ sub _ponder_doc_end {
       return 1;  # I.e., loop around again to process these fake-o paragraphs
     }
   }
-  
+
   splice @$paras; # Well, that's that for this paragraph buffer.
   DEBUG and print STDERR "Throwing end-document event.\n";
 
@@ -1278,8 +1501,9 @@ sub _ponder_over {
   $para->[1]{'~type'} = $list_type;
   push @$curr_open, $para;
    # yes, we reuse the paragraph as a stack item
-  
+
   my $content = join ' ', splice @$para, 2;
+  $para->[1]{'~orig_content'} = $content;
   my $overness;
   if($content =~ m/^\s*$/s) {
     $para->[1]{'indent'} = 4;
@@ -1301,13 +1525,13 @@ sub _ponder_over {
     $para->[1]{'indent'} = 4;
   }
   DEBUG > 1 and print STDERR "=over found of type $list_type\n";
-  
+
   $self->{'content_seen'} ||= 1;
   $self->_handle_element_start((my $scratch = 'over-' . $list_type), 
$para->[1]);
 
   return;
 }
-      
+
 sub _ponder_back {
   my ($self,$para,$curr_open,$paras) = @_;
   # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
@@ -1354,10 +1578,10 @@ sub _ponder_item {
     ;
     return 1;
   }
-  
-  
+
+
   my $over_type = $over->[1]{'~type'};
-  
+
   if(!$over_type) {
     # Shouldn't happen1
     die "Typeless over in stack, starting at line "
@@ -1382,7 +1606,7 @@ sub _ponder_item {
     my $item_type = $self->_get_item_type($para);
       # That kills the content of the item if it's a number or bullet.
     DEBUG and print STDERR " Item is of type ", $para->[0], " under 
$over_type\n";
-    
+
     if($item_type eq 'text') {
       # Nothing special needs doing for 'text'
     } elsif($item_type eq 'number' or $item_type eq 'bullet') {
@@ -1398,16 +1622,16 @@ sub _ponder_item {
     } else {
       die "Unhandled item type $item_type"; # should never happen
     }
-    
+
     # =item-text thingies don't need any assimilation, it seems.
 
   } elsif($over_type eq 'number') {
     my $item_type = $self->_get_item_type($para);
       # That kills the content of the item if it's a number or bullet.
     DEBUG and print STDERR " Item is of type ", $para->[0], " under 
$over_type\n";
-    
+
     my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
-    
+
     if($item_type eq 'bullet') {
       # Hm, it's not numeric.  Correct for this.
       $para->[1]{'number'} = $expected_value;
@@ -1432,7 +1656,7 @@ sub _ponder_item {
 
     } elsif($expected_value == $para->[1]{'number'}) {
       DEBUG > 1 and print STDERR " Numeric item has the expected value of 
$expected_value\n";
-      
+
     } else {
       DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
        " instead of the expected value of $expected_value\n";
@@ -1443,7 +1667,7 @@ sub _ponder_item {
       );
       $para->[1]{'number'} = $expected_value;  # correcting!!
     }
-      
+
     if(@$para == 2) {
       # For the cases where we /didn't/ push to @$para
       if($paras->[0][0] eq '~Para') {
@@ -1460,13 +1684,13 @@ sub _ponder_item {
     my $item_type = $self->_get_item_type($para);
       # That kills the content of the item if it's a number or bullet.
     DEBUG and print STDERR " Item is of type ", $para->[0], " under 
$over_type\n";
-    
+
     if($item_type eq 'bullet') {
       # as expected!
 
       if( $para->[1]{'~_freaky_para_hack'} ) {
         DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
-        push @$para, delete $para->[1]{'~_freaky_para_hack'};
+        push @$para, $para->[1]{'~_freaky_para_hack'};
       }
 
     } elsif($item_type eq 'number') {
@@ -1533,30 +1757,36 @@ sub _ponder_Verbatim {
 
   $para->[1]{'xml:space'} = 'preserve';
 
-  my $indent = $self->strip_verbatim_indent;
-  if ($indent && ref $indent eq 'CODE') {
-      my @shifted = (shift @{$para}, shift @{$para});
-      $indent = $indent->($para);
-      unshift @{$para}, @shifted;
-  }
+  unless ($self->{'_output_is_for_JustPod'}) {
+    my $indent = $self->strip_verbatim_indent;
+    if ($indent && ref $indent eq 'CODE') {
+        my @shifted = (shift @{$para}, shift @{$para});
+        $indent = $indent->($para);
+        unshift @{$para}, @shifted;
+    }
+
+    for(my $i = 2; $i < @$para; $i++) {
+      foreach my $line ($para->[$i]) { # just for aliasing
+        # Strip indentation.
+        $line =~ s/^\Q$indent// if $indent;
 
-  for(my $i = 2; $i < @$para; $i++) {
-    foreach my $line ($para->[$i]) { # just for aliasing
-      # Strip indentation.
-      $line =~ s/^\Q$indent// if $indent
-          && !($self->{accept_codes} && 
$self->{accept_codes}{VerbatimFormatted});
-      while( $line =~
-        # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
-        # tabs are at every EIGHTH column.  For portability, it has to be
-        # one setting everywhere, and 8th wins.
-        s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
-      ) {}
+            # This is commented out because of github issue #85, and the
+            # current maintainers don't know why it was there in the first
+            # place.
+            #&& !($self->{accept_codes} && 
$self->{accept_codes}{VerbatimFormatted});
+        while( $line =~
+          # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
+          # tabs are at every EIGHTH column.  For portability, it has to be
+          # one setting everywhere, and 8th wins.
+          s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
+        ) {}
 
-      # TODO: whinge about (or otherwise treat) unindented or overlong lines
+        # TODO: whinge about (or otherwise treat) unindented or overlong lines
 
+      }
     }
   }
-  
+
   # Now the VerbatimFormatted hoodoo...
   if( $self->{'accept_codes'} and
       $self->{'accept_codes'}{'VerbatimFormatted'}
@@ -1596,7 +1826,7 @@ sub _traverse_treelet_bit {  # for use only by the 
routine above
 
   my $scratch;
   $self->_handle_element_start(($scratch=$name), shift @_);
-  
+
   while (@_) {
     my $x = shift;
     if (ref($x)) {
@@ -1606,7 +1836,7 @@ sub _traverse_treelet_bit {  # for use only by the 
routine above
       $self->_handle_text($x);
     }
   }
-  
+
   $self->_handle_element_end($scratch=$name);
   return;
 }
@@ -1651,7 +1881,7 @@ sub _closers_for_all_curr_open {
 
 sub _verbatim_format {
   my($it, $p) = @_;
-  
+
   my $formatting;
 
   for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
@@ -1659,7 +1889,7 @@ sub _verbatim_format {
     $p->[$i] .= "\n";
      # Unlike with simple Verbatim blocks, we don't end up just doing
      # a join("\n", ...) on the contents, so we have to append a
-     # newline to ever line, and then nix the last one later.
+     # newline to every line, and then nix the last one later.
   }
 
   if( DEBUG > 4 ) {
@@ -1672,7 +1902,7 @@ sub _verbatim_format {
 
   for(my $i = $#$p; $i > 2; $i--) {
     # work backwards over the lines, except the first (#2)
-    
+
     #next unless $p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s
     #        and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
      # look at a formatty line preceding a nonformatty one
@@ -1680,7 +1910,7 @@ sub _verbatim_format {
     if($p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s) {
       DEBUG > 5 and print STDERR "  It's a formatty line.  ",
        "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
-      
+
       if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
         DEBUG > 5 and print STDERR "  Previous line is formatty!  Skipping 
this one.\n";
         next;
@@ -1696,11 +1926,11 @@ sub _verbatim_format {
     # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
     # Example:
     #   What do you want?  i like pie. [or whatever]
-    # #:^^^^^^^^^^^^^^^^^              /////////////         
-    
+    # #:^^^^^^^^^^^^^^^^^              /////////////
+
 
     DEBUG > 4 and print STDERR "_verbatim_format 
considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
-    
+
     $formatting = '  ' . $1;
     $formatting =~ s/\s+$//s; # nix trailing whitespace
     unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
@@ -1716,7 +1946,7 @@ sub _verbatim_format {
     }
     # Make $formatting and the previous line be exactly the same length,
     # with $formatting having a " " as the last character.
- 
+
     DEBUG > 4 and print STDERR "Formatting <$formatting>    on <", $p->[$i-1], 
">\n";
 
 
@@ -1741,10 +1971,10 @@ sub _verbatim_format {
         #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
       }
     }
-    my @nixed =    
+    my @nixed =
       splice @$p, $i-1, 2, @new_line; # replace myself and the next line
     DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n";
-    
+
     DEBUG > 6 and print STDERR "New version of the above line is these tokens 
(",
       scalar(@new_line), "):",
       map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
@@ -1791,29 +2021,46 @@ sub _treelet_from_formatting_codes {
   #            [ 'B', {}, "pie" ],
   #            "!"
   #       ]
-  
+  # This illustrates the general format of a treelet.  It is an array:
+  #     [0]       is a scalar indicating its type.  In the example above, the
+  #               types are '~Top' and 'B'
+  #     [1]       is a hash of various flags about it, possibly empty
+  #     [2] - [N] are an ordered list of the subcomponents of the treelet.
+  #               Scalars are literal text, refs are sub-treelets, to
+  #               arbitrary levels.  Stringifying a treelet will recursively
+  #               stringify the sub-treelets, concatentating everything
+  #               together to form the exact text of the treelet.
+
   my($self, $para, $start_line, $preserve_space) = @_;
-  
+
   my $treelet = ['~Top', {'start_line' => $start_line},];
-  
+
   unless ($preserve_space || $self->{'preserve_whitespace'}) {
     $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
     $para =~ s/ $//;
     $para =~ s/^ //;
   }
-  
+
   # Only apparent problem the above code is that N<<  >> turns into
   # N<< >>.  But then, word wrapping does that too!  So don't do that!
-  
+
+
+  # As a Start-code is encountered, the number of opening bracket '<'
+  # characters minus 1 is pushed onto @stack (so 0 means a single bracket,
+  # etc).  When closing brackets are found in the text, at least this number
+  # (plus the 1) will be required to mean the Start-code is terminated.  When
+  # those are found, @stack is popped.
   my @stack;
+
   my @lineage = ($treelet);
   my $raw = ''; # raw content of L<> fcode before splitting/processing
     # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
-    # into just 1 ' '. Is this the regex's doing or 'raw's?
+    # into just 1 ' '. Is this the regex's doing or 'raw's?  Answer is it's
+    # the 'collapse and trim all whitespace first' lines just above.
   my $inL = 0;
 
   DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n";
- 
+
   # Here begins our frightening tokenizer RE.  The following regex matches
   # text in four main parts:
   #
@@ -1846,7 +2093,11 @@ sub _treelet_from_formatting_codes {
         |
         # Match multiple-bracket end codes.  $3 gets the whitespace that
         # should be discarded before an end bracket but kept in other cases
-        # and $4 gets the end brackets themselves.
+        # and $4 gets the end brackets themselves.  ($3 can be empty if the
+        # construct is empty, like C<<  >>, and all the white-space has been
+        # gobbled up already, considered to be space after the opening
+        # bracket.  In this case we use look-behind to verify that there are
+        # at least 2 spaces in a row before the ">".)
         (\s+|(?<=\s\s))(>{2,})
         |
         (\s?>)          # $5: simple end-codes
@@ -1872,23 +2123,48 @@ sub _treelet_from_formatting_codes {
   ) {
     DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n";
     if(defined $1) {
+      my $bracket_count;    # How many '<<<' in a row this has.  Needed for
+                            # Pod::Simple::JustPod
       if(defined $2) {
         DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n";
-        push @stack, length($2) + 1; 
-          # length of the necessary complex end-code string
+        $bracket_count = length($2) + 1;
+        push @stack, $bracket_count; # length of the necessary complex
+                                     # end-code string
       } else {
         DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n";
         push @stack, 0;  # signal that we're looking for simple
+        $bracket_count = 1;
       }
-      push @lineage, [ substr($1,0,1), {}, ];  # new node object
-      push @{ $lineage[-2] }, $lineage[-1];
-      if ('L' eq substr($1,0,1)) {
-        $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
-        $inL = 1;
+      my $code = substr($1,0,1);
+      if ('L' eq $code) {
+        if ($inL) {
+            $raw .= $1;
+            $self->scream( $start_line,
+                           'Nested L<> are illegal.  Pretending inner one is '
+                         . 'X<...> so can continue looking for other errors.');
+            $code = "X";
+        }
+        else {
+            $raw = ""; # reset raw content accumulator
+            $inL = @stack;
+        }
       } else {
         $raw .= $1 if $inL;
       }
-
+      push @lineage, [ $code, {}, ];  # new node object
+
+      # Tell Pod::Simple::JustPod how many brackets there were, but to save
+      # space, not in the most usual case of there was just 1.  It can be
+      # inferred by the absence of this element.  Similarly, if there is more
+      # than one bracket, extract the white space between the final bracket
+      # and the real beginning of the interior.  Save that if it isn't just a
+      # single space
+      if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) {
+        $lineage[-1][1]{'~bracket_count'} = $bracket_count;
+        my $lspacer = substr($1, 1 + $bracket_count);
+        $lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " ";
+      }
+      push @{ $lineage[-2] }, $lineage[-1];
     } elsif(defined $4) {
       DEBUG > 3 and print STDERR "Found apparent complex end-text code 
\"$3$4\"\n";
       # This is where it gets messy...
@@ -1917,20 +2193,35 @@ sub _treelet_from_formatting_codes {
       }
       #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
 
+      if ($3 ne " " && $self->{'_output_is_for_JustPod'}) {
+        if ($3 ne "") {
+          $lineage[-1][1]{'~rspacer'} = $3;
+        }
+        elsif ($lineage[-1][1]{'~lspacer'} eq "  ") {
+
+          # Here we had something like C<<  >> which was a false positive
+          delete $lineage[-1][1]{'~lspacer'};
+        }
+        else {
+          $lineage[-1][1]{'~rspacer'}
+                                = substr($lineage[-1][1]{'~lspacer'}, -1, 1);
+          chop $lineage[-1][1]{'~lspacer'};
+        }
+      }
+
       push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
       # Keep the element from being childless
-      
-      pop @stack;
-      pop @lineage;
 
-      unless (@stack) { # not in an L if there are no open fcodes
+      if ($inL == @stack) {
+        $lineage[-1][1]{'raw'} = $raw;
         $inL = 0;
-        if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
-          $lineage[-1][-1][1]{'raw'} = $raw
-        }
       }
+
+      pop @stack;
+      pop @lineage;
+
       $raw .= $3.$4 if $inL;
-      
+
     } elsif(defined $5) {
       DEBUG > 3 and print STDERR "Found apparent simple end-text code 
\"$5\"\n";
 
@@ -1944,6 +2235,11 @@ sub _treelet_from_formatting_codes {
           push @{ $lineage[-1] }, ''; # keep it from being really childless
         }
 
+        if ($inL == @stack) {
+          $lineage[-1][1]{'raw'} = $raw;
+          $inL = 0;
+        }
+
         pop @stack;
         pop @lineage;
       } else {
@@ -1951,12 +2247,6 @@ sub _treelet_from_formatting_codes {
         push @{ $lineage[-1] }, $5;
       }
 
-      unless (@stack) { # not in an L if there are no open fcodes
-        $inL = 0;
-        if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
-          $lineage[-1][-1][1]{'raw'} = $raw
-        }
-      }
       $raw .= $5 if $inL;
 
     } elsif(defined $6) {
@@ -1965,6 +2255,7 @@ sub _treelet_from_formatting_codes {
       $raw .= $6 if $inL;
         # XXX does not capture multiplace whitespaces -- 'raw' ends up with
         #     at most 1 leading/trailing whitespace, why not all of it?
+        #     Answer, because we deliberately trimmed it above
 
     } else {
       # should never ever ever ever happen
@@ -2095,7 +2386,7 @@ sub pretty { # adopted from Class::Classless
         # letters, but I don't know if it has always worked without bugs. It
         # seemed safest just to list the characters.
         # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
-        s<([^ 
!#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
+        s<([^ 
!"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
          <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
          #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
       qq{"$_"};
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm
index 83415f8e25..b30dd66296 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Checker.pm
@@ -9,7 +9,7 @@ use Carp ();
 use Pod::Simple::Methody ();
 use Pod::Simple ();
 use vars qw( @ISA $VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
 @ISA = ('Pod::Simple::Methody');
 BEGIN { *DEBUG = defined(&Pod::Simple::DEBUG)
           ? \&Pod::Simple::DEBUG
@@ -88,8 +88,10 @@ sub end_item_text   { $_[0]->emit_par(-2) }
 sub emit_par {
   return unless $_[0]{'Errata_seen'};
   my($self, $tweak_indent) = splice(@_,0,2);
-  my $indent = ' ' x ( 2 * $self->{'Indent'} + ($tweak_indent||0) );
+  my $length = 2 * $self->{'Indent'} + ($tweak_indent||0);
+  my $indent = ' ' x ($length > 0 ? $length : 0);
    # Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0
+   # 'Negative repeat count does nothing' since 5.22
 
   $self->{'Thispara'} =~ s/$Pod::Simple::shy//g;
   my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n");
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm
index 428cc72359..aa714db47a 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/Debug.pm
@@ -2,7 +2,7 @@ require 5;
 package Pod::Simple::Debug;
 use strict;
 use vars qw($VERSION );
-$VERSION = '3.35';
+$VERSION = '3.36';
 
 sub import {
   my($value,$variable);
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
index 71bef5070b..2de11f19fb 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsText.pm
@@ -1,7 +1,7 @@
 
 require 5;
 package Pod::Simple::DumpAsText;
-$VERSION = '3.35';
+$VERSION = '3.36';
 use Pod::Simple ();
 BEGIN {@ISA = ('Pod::Simple')}
 
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
index 9d84878cb7..b68597fb68 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/DumpAsXML.pm
@@ -1,7 +1,7 @@
 
 require 5;
 package Pod::Simple::DumpAsXML;
-$VERSION = '3.35';
+$VERSION = '3.36';
 use Pod::Simple ();
 BEGIN {@ISA = ('Pod::Simple')}
 
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm
index 9cdbed217e..977e92ff32 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm
@@ -9,7 +9,7 @@ use vars qw(
   $Doctype_decl  $Content_decl
 );
 @ISA = ('Pod::Simple::PullParser');
-$VERSION = '3.35';
+$VERSION = '3.36';
 BEGIN {
   if(defined &DEBUG) { } # no-op
   elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
@@ -29,7 +29,7 @@ $LamePad = '' unless defined $LamePad;
 
 $Linearization_Limit = 120 unless defined $Linearization_Limit;
  # headings/items longer than that won't get an <a name="...">
-$Perldoc_URL_Prefix  = 'http://search.cpan.org/perldoc?'
+$Perldoc_URL_Prefix  = 'https://metacpan.org/pod/'
  unless defined $Perldoc_URL_Prefix;
 $Perldoc_URL_Postfix = ''
  unless defined $Perldoc_URL_Postfix;
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
index 661266d0de..58cd1ee9a8 100644
--- a/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
+++ b/cpan/Pod-Simple/lib/Pod/Simple/HTMLBatch.pm
@@ -5,7 +5,7 @@ use strict;
 use vars qw( $VERSION $HTML_RENDER_CLASS $HTML_EXTENSION
  $CSS $JAVASCRIPT $SLEEPY $SEARCH_CLASS @ISA
 );
-$VERSION = '3.35';
+$VERSION = '3.36';
 @ISA = ();  # Yup, we're NOT a subclass of Pod::Simple::HTML!
 
 # TODO: nocontents stylesheets. Strike some of the color variations?
@@ -720,22 +720,21 @@ sub _gen_css_wad {
   }
 
   # Now a few indexless variations:
-  foreach my $variation (
-      'blkbluw', # black_with_blue_on_white
-      'whtpurk', # white_with_purple_on_black
-      'whtgrng', # white_with_green_on_grey
-      'grygrnw', # grey_with_green_on_white
-  ) {
-    my $outname = $variation;
+  for (my ($outfile, $variation) = each %{{
+      blkbluw => 'black_with_blue_on_white',
+      whtpurk => 'white_with_purple_on_black',
+      whtgrng => 'white_with_green_on_grey',
+      grygrnw => 'grey_with_green_on_white',
+  }}) {
     my $this_css = join "\n",
-      "/* This file is autogenerated.  Do not edit.  $outname */\n",
+      "/* This file is autogenerated.  Do not edit.  $outfile */\n",
       "\@import url(\"./_$variation.css\");",
       ".indexgroup { display: none; }",
       "\n",
     ;
-    my $name = $outname;    
+    my $name = $outfile;
     $name =~ tr/-_/  /;
-    $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css);
+    $self->add_css( "_$outfile.css", 0, $name, 0, 0, \$this_css);
   }
 
   return;
@@ -1110,12 +1109,15 @@ Example:
 
 =item $batchconv = Pod::Simple::HTMLBatch->new;
 
-This TODO
-
+This creates a new batch converter.  The method doesn't take parameters.
+To change the converter's attributes, use the L<"/ACCESSOR METHODS">
+below.
 
 =item $batchconv->batch_convert( I<indirs>, I<outdir> );
 
-this TODO
+This searches the directories given in I<indirs> and writes
+HTML files for each of these to a corresponding directory
+in I<outdir>.  The directory I<outdir> must exist.
 
 =item $batchconv->batch_convert( undef    , ...);
 
diff --git a/cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm 
b/cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm
new file mode 100644
index 0000000000..c7ad3d6977
--- /dev/null
+++ b/cpan/Pod-Simple/lib/Pod/Simple/JustPod.pm
@@ -0,0 +1,362 @@
+use 5;
+package Pod::Simple::JustPod;
+# ABSTRACT: Pod::Simple formatter that extracts POD from a file containing
+#           other things as well
+use strict;
+use warnings;
+
+use Pod::Simple::Methody ();
+our @ISA = ('Pod::Simple::Methody');
+
+sub new {
+  my $self = shift;
+  my $new  = $self->SUPER::new(@_);
+
+  $new->accept_targets('*');
+  $new->keep_encoding_directive(1);
+  $new->preserve_whitespace(1);
+  $new->complain_stderr(1);
+  $new->_output_is_for_JustPod(1);
+
+  return $new;
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+sub check_that_all_is_closed {
+
+  # Actually checks that the things we depend on being balanced in fact are,
+  # so that we can continue in spit of pod errors
+
+  my $self = shift;
+  while ($self->{inL}) {
+    $self->end_L(@_);
+  }
+  while ($self->{fcode_end} && @{$self->{fcode_end}}) {
+    $self->_end_fcode(@_);
+  }
+}
+
+sub handle_text {
+
+  # Add text to the output buffer.  This is skipped if within a L<>, as we use
+  # the 'raw' attribute of that tag instead.
+
+  $_[0]{buffer} .= $_[1] unless $_[0]{inL} ;
+}
+
+sub spacer {
+
+  # Prints the white space following things like =head1.  This is normally a
+  # blank, unless BlackBox has told us otherwise.
+
+  my ($self, $arg) = @_;
+  return unless $arg;
+
+  my $spacer = ($arg->{'~orig_spacer'})
+                ? $arg->{'~orig_spacer'}
+                : " ";
+  $self->handle_text($spacer);
+}
+
+sub _generic_start {
+
+  # Called from tags like =head1, etc.
+
+  my ($self, $text, $arg) = @_;
+  $self->check_that_all_is_closed();
+  $self->handle_text($text);
+  $self->spacer($arg);
+}
+
+sub start_Document    { shift->_generic_start("=pod\n\n"); }
+sub start_head1       { shift->_generic_start('=head1', @_); }
+sub start_head2       { shift->_generic_start('=head2', @_); }
+sub start_head3       { shift->_generic_start('=head3', @_); }
+sub start_head4       { shift->_generic_start('=head4', @_); }
+sub start_encoding    { shift->_generic_start('=encoding', @_); }
+# sub start_Para
+# sub start_Verbatim
+
+sub start_item_bullet { # Handle =item *
+  my ($self, $arg) = @_;
+  $self->check_that_all_is_closed();
+  $self->handle_text('=item');
+
+  # It can be that they said simply '=item', and it is inferred that it is to
+  # be a bullet.
+  if (! $arg->{'~orig_content'}) {
+    $self->handle_text("\n\n");
+  }
+  else {
+    $self->spacer($arg);
+    if ($arg->{'~_freaky_para_hack'}) {
+
+        # See Message Id <[email protected]>
+        my $item_text = $arg->{'~orig_content'};
+        my $trailing = quotemeta $arg->{'~_freaky_para_hack'};
+        $item_text =~ s/$trailing$//;
+        $self->handle_text($item_text);
+    }
+    else {
+        $self->handle_text("*\n\n");
+    }
+  }
+}
+
+sub start_item_number {     # Handle '=item 2'
+  my ($self, $arg) = @_;
+  $self->check_that_all_is_closed();
+  $self->handle_text("=item");
+  $self->spacer($arg);
+  $self->handle_text("$arg->{'~orig_content'}\n\n");
+}
+
+sub start_item_text {   # Handle '=item foo bar baz'
+  my ($self, $arg) = @_;
+  $self->check_that_all_is_closed();
+  $self->handle_text('=item');
+  $self->spacer($arg);
+}
+
+sub _end_item {
+  my $self = shift;
+  $self->check_that_all_is_closed();
+  $self->emit;
+}
+
+*end_item_bullet = *_end_item;
+*end_item_number = *_end_item;
+*end_item_text   = *_end_item;
+
+sub _start_over  {  # Handle =over
+  my ($self, $arg) = @_;
+  $self->check_that_all_is_closed();
+  $self->handle_text("=over");
+
+  # The =over amount is optional
+  if ($arg->{'~orig_content'}) {
+    $self->spacer($arg);
+    $self->handle_text("$arg->{'~orig_content'}");
+  }
+  $self->handle_text("\n\n");
+}
+
+*start_over_bullet = *_start_over;
+*start_over_number = *_start_over;
+*start_over_text   = *_start_over;
+*start_over_block  = *_start_over;
+
+sub _end_over  {
+  my $self = shift;
+  $self->check_that_all_is_closed();
+  $self->handle_text('=back');
+  $self->emit;
+}
+
+*end_over_bullet = *_end_over;
+*end_over_number = *_end_over;
+*end_over_text   = *_end_over;
+*end_over_block  = *_end_over;
+
+sub end_Document    {
+  my $self = shift;
+  $self->emit;        # Make sure buffer gets flushed
+  print {$self->{'output_fh'} } "=cut\n"
+}
+
+sub _end_generic  {
+  my $self = shift;
+  $self->check_that_all_is_closed();
+  $self->emit;
+}
+
+*end_head1    = *_end_generic;
+*end_head2    = *_end_generic;
+*end_head3    = *_end_generic;
+*end_head4    = *_end_generic;
+*end_encoding = *_end_generic;
+*end_Para     = *_end_generic;
+*end_Verbatim = *_end_generic;
+
+sub _start_fcode {
+  my ($type, $self, $flags) = @_;
+
+  # How many brackets is set by BlackBox unless the count is 1
+  my $bracket_count = (exists $flags->{'~bracket_count'})
+                       ? $flags->{'~bracket_count'}
+                       : 1;
+  $self->handle_text($type . ( "<" x $bracket_count));
+
+  my $rspacer = "";
+  if ($bracket_count > 1) {
+    my $lspacer = (exists $flags->{'~lspacer'})
+                  ? $flags->{'~lspacer'}
+                  : " ";
+    $self->handle_text($lspacer);
+
+    $rspacer = (exists $flags->{'~rspacer'})
+                  ? $flags->{'~rspacer'}
+                  : " ";
+  }
+
+  # BlackBox doesn't output things for for the ending code callbacks, so save
+  # what we need.
+  push @{$self->{'fcode_end'}}, [ $bracket_count, $rspacer ];
+}
+
+sub start_B { _start_fcode('B', @_); }
+sub start_C { _start_fcode('C', @_); }
+sub start_E { _start_fcode('E', @_); }
+sub start_F { _start_fcode('F', @_); }
+sub start_I { _start_fcode('I', @_); }
+sub start_S { _start_fcode('S', @_); }
+sub start_X { _start_fcode('X', @_); }
... 2185 lines suppressed ...

-- 
Perl5 Master Repository

[perl.git] branch blead updated. v5.31.0-71-g0478e945a3

Reply via email to