In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/2bdc80de969b38bd35347b37a3c6378bb5e97519?hp=42b68fb1f934ac50514f30d47e6528efa653e54f>

- Log -----------------------------------------------------------------
commit 2bdc80de969b38bd35347b37a3c6378bb5e97519
Author: Karl Williamson <[email protected]>
Date:   Wed Sep 26 21:40:22 2012 -0600

    perlreguts: Fit long verbatim lines to 79 cols

M       pod/perlreguts.pod
M       t/porting/known_pod_issues.dat

commit 8ee2793fb909a6ffc6d6ad6631ed31f3931b77ed
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 27 10:12:41 2012 -0600

    mktables: Mention USourceData in generated pod
    
    These files were included by Unicode for the first time in the final
    version of its version 6.2.  They document proposals for encoding
    Han characters in Unicode.  As far as I can tell, they have no real use
    except to people working on such proposals.  They are considered part of
    the Unicode Character Database, however, and should be mentioned in
    perluniprops as data that Perl ignores from that database.

M       lib/unicore/mktables

commit caa7539541ed1f0d9fcf590a46c35f3255d8db61
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 27 10:12:06 2012 -0600

    mktables: Nits in comments, generated pod

M       lib/unicore/mktables
-----------------------------------------------------------------------

Summary of changes:
 lib/unicore/mktables           |    8 +++--
 pod/perlreguts.pod             |   74 +++++++++++++++++++++------------------
 t/porting/known_pod_issues.dat |    1 -
 3 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index e779b08..633686f 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -1021,7 +1021,7 @@ if ($v_version ge v4.1.0) {
     $why_suppressed{'Script=Katakana_Or_Hiragana'} = 'Obsolete.  All code 
points previously matched by this have been moved to "Script=Common".';
 }
 if ($v_version ge v6.0.0) {
-    $why_suppressed{'Script=Katakana_Or_Hiragana'} .= '  Consider instead 
using "Script_Extensions=Katakana" or "Script_Extensions=Hiragana (or both)"';
+    $why_suppressed{'Script=Katakana_Or_Hiragana'} .= '  Consider instead 
using "Script_Extensions=Katakana" or "Script_Extensions=Hiragana" (or both)';
     $why_suppressed{'Script_Extensions=Katakana_Or_Hiragana'} = 'All code 
points that would be matched by this are matched by either 
"Script_Extensions=Katakana" or "Script_Extensions=Hiragana"';
 }
 
@@ -1078,7 +1078,7 @@ END
 # The input files don't list every code point.  Those not listed are to be
 # defaulted to some value.  Below are hard-coded what those values are for
 # non-binary properties as of 5.1.  Starting in 5.0, there are
-# machine-parsable comment lines in the files the give the defaults; so this
+# machine-parsable comment lines in the files that give the defaults; so this
 # list shouldn't have to be extended.  The claim is that all missing entries
 # for binary properties will default to 'N'.  Unicode tried to change that in
 # 5.2, but the beta period produced enough protest that they backed off.
@@ -1149,6 +1149,8 @@ my %ignored_files = (
     'ReadMe.txt' => 'Documentation',
     'StandardizedVariants.txt' => 'Certain glyph variations for character 
display are standardized.  This lists the non-Unihan ones; the Unihan ones are 
also not used by Perl, and are in a separate U ... [48 chars truncated]
     'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy 
Japanese cell-phone values',
+    'USourceData.txt' => 'Documentation of status and cross reference of 
proposals for encoding by Unicode of Unihan characters',
+    'USourceData.pdf' => 'Documentation of status and cross reference of 
proposals for encoding by Unicode of Unihan characters',
     'auxiliary/WordBreakTest.html' => 'Documentation of validation tests',
     'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests',
     'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests',
@@ -15336,7 +15338,7 @@ the left brace completely changes the meaning of the 
construct, from "match"
 (for C<\\p{}>) to "doesn't match" (for C<\\P{}>).  Casing in this document is
 for improved legibility.
 
-Also, white space, hyphens, and underscores are also normally ignored
+Also, white space, hyphens, and underscores are normally ignored
 everywhere between the {braces}, and hence can be freely added or removed
 even if the C</x> modifier hasn't been specified on the regular expression.
 But $a_bold_stricter at the beginning of an entry in the table below
diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod
index ec1c243..75dc6dd 100644
--- a/pod/perlreguts.pod
+++ b/pod/perlreguts.pod
@@ -182,9 +182,9 @@ POSIX char classes called C<regnode_charclass_class> which 
has an
 additional 4-byte (32-bit) bitmap indicating which POSIX char classes
 have been included.
 
-    regnode_charclass_class  U32 arg1;
-                             char bitmap[ANYOF_BITMAP_SIZE];
-                             char classflags[ANYOF_CLASSBITMAP_SIZE];
+   regnode_charclass_class  U32 arg1;
+                            char bitmap[ANYOF_BITMAP_SIZE];
+                            char classflags[ANYOF_CLASSBITMAP_SIZE];
 
 =back
 
@@ -354,20 +354,23 @@ simpler form.
 
 The call graph looks like this:
 
-    reg()                        # parse a top level regex, or inside of parens
-        regbranch()              # parse a single branch of an alternation
-            regpiece()           # parse a pattern followed by a quantifier
-                regatom()        # parse a simple pattern
-                    regclass()   #   used to handle a class
-                    reg()        #   used to handle a parenthesised subpattern
-                    ....
-            ...
-            regtail()            # finish off the branch
-        ...
-        regtail()                # finish off the branch sequence. Tie each
-                                 # branch's tail to the tail of the sequence
-                                 # (NEW) In Debug mode this is
-                                 # regtail_study().
+ reg()                        # parse a top level regex, or inside of
+                              # parens
+     regbranch()              # parse a single branch of an alternation
+         regpiece()           # parse a pattern followed by a quantifier
+             regatom()        # parse a simple pattern
+                 regclass()   #   used to handle a class
+                 reg()        #   used to handle a parenthesised
+                              #   subpattern
+                 ....
+         ...
+         regtail()            # finish off the branch
+     ...
+     regtail()                # finish off the branch sequence. Tie each
+                              # branch's tail to the tail of the
+                              # sequence
+                              # (NEW) In Debug mode this is
+                              # regtail_study().
 
 A grammar form might be something like this:
 
@@ -489,11 +492,11 @@ Now for something much more complex: 
C</x(?:foo*|b[a][rR])(foo|bar)$/>
                                       atom
  >)$<             34              tail~ BRANCH (28)
                   36              tsdy~ BRANCH (END) (31)
-                                      ~ attach to CLOSE1 (34) offset to 3
+                                     ~ attach to CLOSE1 (34) offset to 3
                                   tsdy~ EXACT <foo> (EXACT) (29)
-                                      ~ attach to CLOSE1 (34) offset to 5
+                                     ~ attach to CLOSE1 (34) offset to 5
                                   tsdy~ EXACT <bar> (EXACT) (32)
-                                      ~ attach to CLOSE1 (34) offset to 2
+                                     ~ attach to CLOSE1 (34) offset to 2
  >$<                        tail~ BRANCH (3)
                                 ~ BRANCH (9)
                                 ~ TAIL (25)
@@ -765,7 +768,7 @@ implement things such as the stringification of C<qr//>.
 The other structure is pointed to be the C<regexp> struct's
 C<pprivate> and is in addition to C<intflags> in the same struct
 considered to be the property of the regex engine which compiled the
-regular expression; 
+regular expression;
 
 The regexp structure contains all the data that perl needs to be aware of
 to properly work with the regular expression. It includes data about
@@ -792,19 +795,22 @@ The following structure is used as the C<pprivate> struct 
by perl's
 regex engine. Since it is specific to perl it is only of curiosity
 value to other engine implementations.
 
-    typedef struct regexp_internal {
-            regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */
-            U32 *offsets;           /* offset annotations 20001228 MJD 
-                                       data about mapping the program to the 
-                                       string*/
-            regnode *regstclass;    /* Optional startclass as identified or 
constructed
-                                       by the optimiser */
-            struct reg_data *data;  /* Additional miscellaneous data used by 
the program.
-                                       Used to make it easier to clone and 
free arbitrary
-                                       data that the regops need. Often the 
ARG field of
-                                       a regop is an index into this structure 
*/
-            regnode program[1];     /* Unwarranted chumminess with compiler. */
-    } regexp_internal;
+ typedef struct regexp_internal {
+         regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */
+         U32 *offsets;           /* offset annotations 20001228 MJD
+                                  * data about mapping the program to
+                                  * the string*/
+         regnode *regstclass;    /* Optional startclass as identified or
+                                  * constructed by the optimiser */
+         struct reg_data *data;  /* Additional miscellaneous data used
+                                  * by the program.  Used to make it
+                                  * easier to clone and free arbitrary
+                                  * data that the regops need. Often the
+                                  * ARG field of a regop is an index
+                                  * into this structure */
+         regnode program[1];     /* Unwarranted chumminess with
+                                  * compiler. */
+ } regexp_internal;
 
 =over 5
 
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index f90ab1f..6274a8d 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -267,7 +267,6 @@ pod/perlpodspec.pod Verbatim line length including indents 
exceeds 79 by    9
 pod/perlpodstyle.pod   Verbatim line length including indents exceeds 79 by    
1
 pod/perlrebackslash.pod        Verbatim line length including indents exceeds 
79 by    1
 pod/perlref.pod        Verbatim line length including indents exceeds 79 by    
1
-pod/perlreguts.pod     Verbatim line length including indents exceeds 79 by    
17
 pod/perlrequick.pod    Verbatim line length including indents exceeds 79 by    
3
 pod/perlretut.pod      Verbatim line length including indents exceeds 79 by    
13
 pod/perlrun.pod        Verbatim line length including indents exceeds 79 by    
2

--
Perl5 Master Repository

Reply via email to