[perl.git] branch blead updated. v5.29.8-24-g966b4e4752

Karl Williamson Mon, 04 Mar 2019 10:11:46 -0800

In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/966b4e4752e107a969ce19fbdbdb819547d41137?hp=de59f38ed9c2eb88d97eed5f7ade475479bc3248>


- Log -----------------------------------------------------------------
commit 966b4e4752e107a969ce19fbdbdb819547d41137
Author: Karl Williamson <[email protected]>
Date:   Mon Feb 18 17:57:11 2019 -0700

    perlop: Improve documentation for (mostly) tr///
    
    This adds examples and clarifications

commit 0a142f463c08e1bf0466cee9a0f896e3d11e7dbf
Author: Karl Williamson <[email protected]>
Date:   Mon Mar 4 10:30:38 2019 -0700

    ebcdic_tables.h: Remove alien '#'
    
    These were introduced in c05125c57fd7868af65366bacb6fe40c04b1c719 in
    July 2018, and would cause any EBCDIC compilations to fail.
    
    That I found it by code inspection shows that we've lost all our EBCDIC
    smokers again.

commit 635ff1f923d510fc8356bff054b96cbd97d93bf3
Author: Karl Williamson <[email protected]>
Date:   Mon Mar 4 10:20:35 2019 -0700

    regen/ebcdic.pl: Move code to function
    
    This is for eventual use in being called from more than one place.

-----------------------------------------------------------------------

Summary of changes:
 ebcdic_tables.h |  96 +++++++++++++++++-----------------
 pod/perlop.pod  | 159 ++++++++++++++++++++++++++++++++++++--------------------
 regen/ebcdic.pl |  26 ++++++---
 3 files changed, 170 insertions(+), 111 deletions(-)

diff --git a/ebcdic_tables.h b/ebcdic_tables.h
index 103e10ef90..99f533bae7 100644
--- a/ebcdic_tables.h
+++ b/ebcdic_tables.h
@@ -44,9 +44,9 @@ SOFTWARE.
 
 /* Index is ASCII platform code point; value is EBCDIC 1047 equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_a2e[];
+    EXTCONST U8 PL_a2e[];
 #  else
-#    EXTCONST U8 PL_a2e[] = {
+    EXTCONST U8 PL_a2e[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F,
@@ -70,9 +70,9 @@ SOFTWARE.
 
 /* Index is EBCDIC 1047 code point; value is ASCII platform equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_e2a[];
+    EXTCONST U8 PL_e2a[];
 #  else
-#    EXTCONST U8 PL_e2a[] = {
+    EXTCONST U8 PL_e2a[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
@@ -97,9 +97,9 @@ SOFTWARE.
 /* (Confusingly named) Index is EBCDIC 1047 I8 byte; value is
  * EBCDIC 1047 UTF-EBCDIC equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_utf2e[];
+    EXTCONST U8 PL_utf2e[];
 #  else
-#    EXTCONST U8 PL_utf2e[] = {
+    EXTCONST U8 PL_utf2e[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F,
@@ -124,9 +124,9 @@ SOFTWARE.
 /* (Confusingly named) Index is EBCDIC 1047 UTF-EBCDIC byte; value is
  * EBCDIC 1047 I8 equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_e2utf[];
+    EXTCONST U8 PL_e2utf[];
 #  else
-#    EXTCONST U8 PL_e2utf[] = {
+    EXTCONST U8 PL_e2utf[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
@@ -153,9 +153,9 @@ SOFTWARE.
  * flags table in tr16.  The entries marked 9 in tr16 are continuation bytes
  * and are marked as length 1 here so that we can recover. */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_utf8skip[];
+    EXTCONST U8 PL_utf8skip[];
 #  else
-#    EXTCONST U8 PL_utf8skip[] = {
+    EXTCONST U8 PL_utf8skip[] = {
 /*     _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_*/  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
 /*1_*/  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
@@ -179,9 +179,9 @@ SOFTWARE.
 
 /* Index is EBCDIC 1047 code point; value is its lowercase equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_latin1_lc[];
+    EXTCONST U8 PL_latin1_lc[];
 #  else
-#    EXTCONST U8 PL_latin1_lc[] = {
+    EXTCONST U8 PL_latin1_lc[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -207,9 +207,9 @@ SOFTWARE.
  * The 'mod' in the name means that codepoints whose uppercase is above 255 or
  * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_mod_latin1_uc[];
+    EXTCONST U8 PL_mod_latin1_uc[];
 #  else
-#    EXTCONST U8 PL_mod_latin1_uc[] = {
+    EXTCONST U8 PL_mod_latin1_uc[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -234,9 +234,9 @@ SOFTWARE.
 /* Index is EBCDIC 1047 code point; For A-Z, value is a-z; for a-z, value
  * is A-Z; all other code points map to themselves */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_fold[];
+    EXTCONST U8 PL_fold[];
 #  else
-#    EXTCONST U8 PL_fold[] = {
+    EXTCONST U8 PL_fold[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -262,9 +262,9 @@ SOFTWARE.
  * (A => a; a => A, etc) in the 0-255 range.  If no such equivalent, value is
  * the code point itself */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_fold_latin1[];
+    EXTCONST U8 PL_fold_latin1[];
 #  else
-#    EXTCONST U8 PL_fold_latin1[] = {
+    EXTCONST U8 PL_fold_latin1[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -293,9 +293,9 @@ SOFTWARE.
  */
 
 #  ifndef DOINIT
-#    EXTCONST U8 PL_extended_utf8_dfa_tab[];
+    EXTCONST U8 PL_extended_utf8_dfa_tab[];
 #  else
-#    EXTCONST U8 PL_extended_utf8_dfa_tab[] = {
+    EXTCONST U8 PL_extended_utf8_dfa_tab[] = {
 /*         _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_    */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 /*1_    */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -334,9 +334,9 @@ SOFTWARE.
  */
 
 #  ifndef DOINIT
-#    EXTCONST U16 PL_strict_utf8_dfa_tab[];
+    EXTCONST U16 PL_strict_utf8_dfa_tab[];
 #  else
-#    EXTCONST U16 PL_strict_utf8_dfa_tab[] = {
+    EXTCONST U16 PL_strict_utf8_dfa_tab[] = {
 /*          _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_     */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 /*1_     */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -380,9 +380,9 @@ SOFTWARE.
  */
 
 #  ifndef DOINIT
-#    EXTCONST U8 PL_c9_utf8_dfa_tab[];
+    EXTCONST U8 PL_c9_utf8_dfa_tab[];
 #  else
-#    EXTCONST U8 PL_c9_utf8_dfa_tab[] = {
+    EXTCONST U8 PL_c9_utf8_dfa_tab[] = {
 /*        _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_   */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 /*1_   */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -421,9 +421,9 @@ SOFTWARE.
 
 /* Index is ASCII platform code point; value is EBCDIC 037 equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_a2e[];
+    EXTCONST U8 PL_a2e[];
 #  else
-#    EXTCONST U8 PL_a2e[] = {
+    EXTCONST U8 PL_a2e[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F,
@@ -447,9 +447,9 @@ SOFTWARE.
 
 /* Index is EBCDIC 037 code point; value is ASCII platform equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_e2a[];
+    EXTCONST U8 PL_e2a[];
 #  else
-#    EXTCONST U8 PL_e2a[] = {
+    EXTCONST U8 PL_e2a[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
@@ -474,9 +474,9 @@ SOFTWARE.
 /* (Confusingly named) Index is EBCDIC 037 I8 byte; value is
  * EBCDIC 037 UTF-EBCDIC equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_utf2e[];
+    EXTCONST U8 PL_utf2e[];
 #  else
-#    EXTCONST U8 PL_utf2e[] = {
+    EXTCONST U8 PL_utf2e[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F,
@@ -501,9 +501,9 @@ SOFTWARE.
 /* (Confusingly named) Index is EBCDIC 037 UTF-EBCDIC byte; value is
  * EBCDIC 037 I8 equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_e2utf[];
+    EXTCONST U8 PL_e2utf[];
 #  else
-#    EXTCONST U8 PL_e2utf[] = {
+    EXTCONST U8 PL_e2utf[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
@@ -530,9 +530,9 @@ SOFTWARE.
  * flags table in tr16.  The entries marked 9 in tr16 are continuation bytes
  * and are marked as length 1 here so that we can recover. */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_utf8skip[];
+    EXTCONST U8 PL_utf8skip[];
 #  else
-#    EXTCONST U8 PL_utf8skip[] = {
+    EXTCONST U8 PL_utf8skip[] = {
 /*     _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_*/  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
 /*1_*/  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
@@ -556,9 +556,9 @@ SOFTWARE.
 
 /* Index is EBCDIC 037 code point; value is its lowercase equivalent */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_latin1_lc[];
+    EXTCONST U8 PL_latin1_lc[];
 #  else
-#    EXTCONST U8 PL_latin1_lc[] = {
+    EXTCONST U8 PL_latin1_lc[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -584,9 +584,9 @@ SOFTWARE.
  * The 'mod' in the name means that codepoints whose uppercase is above 255 or
  * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_mod_latin1_uc[];
+    EXTCONST U8 PL_mod_latin1_uc[];
 #  else
-#    EXTCONST U8 PL_mod_latin1_uc[] = {
+    EXTCONST U8 PL_mod_latin1_uc[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -611,9 +611,9 @@ SOFTWARE.
 /* Index is EBCDIC 037 code point; For A-Z, value is a-z; for a-z, value
  * is A-Z; all other code points map to themselves */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_fold[];
+    EXTCONST U8 PL_fold[];
 #  else
-#    EXTCONST U8 PL_fold[] = {
+    EXTCONST U8 PL_fold[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -639,9 +639,9 @@ SOFTWARE.
  * (A => a; a => A, etc) in the 0-255 range.  If no such equivalent, value is
  * the code point itself */
 #  ifndef DOINIT
-#    EXTCONST U8 PL_fold_latin1[];
+    EXTCONST U8 PL_fold_latin1[];
 #  else
-#    EXTCONST U8 PL_fold_latin1[] = {
+    EXTCONST U8 PL_fold_latin1[] = {
 /*      _0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   
_E  _F*/
 
/*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 
/*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
@@ -670,9 +670,9 @@ SOFTWARE.
  */
 
 #  ifndef DOINIT
-#    EXTCONST U8 PL_extended_utf8_dfa_tab[];
+    EXTCONST U8 PL_extended_utf8_dfa_tab[];
 #  else
-#    EXTCONST U8 PL_extended_utf8_dfa_tab[] = {
+    EXTCONST U8 PL_extended_utf8_dfa_tab[] = {
 /*         _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_    */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 /*1_    */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -711,9 +711,9 @@ SOFTWARE.
  */
 
 #  ifndef DOINIT
-#    EXTCONST U16 PL_strict_utf8_dfa_tab[];
+    EXTCONST U16 PL_strict_utf8_dfa_tab[];
 #  else
-#    EXTCONST U16 PL_strict_utf8_dfa_tab[] = {
+    EXTCONST U16 PL_strict_utf8_dfa_tab[] = {
 /*          _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_     */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 /*1_     */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -757,9 +757,9 @@ SOFTWARE.
  */
 
 #  ifndef DOINIT
-#    EXTCONST U8 PL_c9_utf8_dfa_tab[];
+    EXTCONST U8 PL_c9_utf8_dfa_tab[];
 #  else
-#    EXTCONST U8 PL_c9_utf8_dfa_tab[] = {
+    EXTCONST U8 PL_c9_utf8_dfa_tab[] = {
 /*        _0  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E _F*/
 /*0_   */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 /*1_   */  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
diff --git a/pod/perlop.pod b/pod/perlop.pod
index af695b678f..dd658bf5fb 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -2211,6 +2211,10 @@ Examples:
 
     s/([^ ]*) *([^ ]*)/$2 $1/; # reverse 1st two fields
 
+    $foo !~ s/A/a/g;    # Lowercase all A's in $foo; return
+                        # 0 if any were found and changed;
+                        # otherwise return 1
+
 Note the use of C<$> instead of C<\> in the last example.  Unlike
 B<sed>, we use the \<I<digit>> form only in the left hand side.
 Anywhere else it's $<I<digit>>.
@@ -2405,10 +2409,14 @@ X<tr> X<y> X<transliterate> X</c> X</d> X</s>
 
 =item C<y/I<SEARCHLIST>/I<REPLACEMENTLIST>/cdsr>
 
-Transliterates all occurrences of the characters found in the search list
-with the corresponding character in the replacement list.  It returns
-the number of characters replaced or deleted.  If no string is
-specified via the C<=~> or C<!~> operator, the C<$_> string is transliterated.
+Transliterates all occurrences of the characters found (or not found
+if the C</c> modifier is specified) in the search list with the
+positionally corresponding character in the replacement list, possibly
+deleting some, depending on the modifiers specified.  It returns the
+number of characters replaced or deleted.  If no string is specified via
+the C<=~> or C<!~> operator, the C<$_> string is transliterated.
+
+For B<sed> devotees, C<y> is provided as a synonym for C<tr>.
 
 If the C</r> (non-destructive) option is present, a new copy of the string
 is made and its characters transliterated, and this copy is returned no
@@ -2428,20 +2436,18 @@ Otherwise, a character range may be specified with a 
hyphen, so
 C<tr/A-J/0-9/> does the same replacement as
 C<tr/ACEGIBDFHJ/0246813579/>.
 
-For B<sed> devotees, C<y> is provided as a synonym for C<tr>.
-
 If the I<SEARCHLIST> is delimited by bracketing quotes, the
 I<REPLACEMENTLIST> must have its own pair of quotes, which may or may
 not be bracketing quotes; for example, C<tr[aeiouy][yuoiea]> or
 C<tr(+\-*/)/ABCD/>.
 
-Characters may be literals or (if the delimiters aren't single quotes)
+Characters may be literals, or (if the delimiters aren't single quotes)
 any of the escape sequences accepted in double-quoted strings.  But
 there is never any variable interpolation, so C<"$"> and C<"@"> are
-treated as literals.  A hyphen at the beginning or end, or preceded by a
-backslash is considered a literal.  Escape sequence details are in L<the
-table near the beginning of this section|/Quote and Quote-like
-Operators>.
+always treated as literals.  A hyphen at the beginning or end, or
+preceded by a backslash is also always considered a literal.  Escape
+sequence details are in L<the table near the beginning of this
+section|/Quote and Quote-like Operators>.
 
 Note that C<tr> does B<not> do regular expression character classes such as
 C<\d> or C<\pL>.  The C<tr> operator is not equivalent to the C<L<tr(1)>>
@@ -2480,85 +2486,128 @@ range's end points are expressed as C<\N{...}>
 removes from C<$string> all the platform's characters which are
 equivalent to any of Unicode U+0020, U+0021, ... U+007D, U+007E.  This
 is a portable range, and has the same effect on every platform it is
-run on.  It turns out that in this example, these are the ASCII
+run on.  In this example, these are the ASCII
 printable characters.  So after this is run, C<$string> has only
 controls and characters which have no ASCII equivalents.
 
 But, even for portable ranges, it is not generally obvious what is
-included without having to look things up.  A sound principle is to use
-only ranges that both begin from and end at either ASCII alphabetics of
-equal case (C<b-e>, C<B-E>), or digits (C<1-4>).  Anything else is
-unclear (and unportable unless C<\N{...}> is used).  If in doubt, spell
-out the character sets in full.
+included without having to look things up in the manual.  A sound
+principle is to use only ranges that both begin from, and end at, either
+ASCII alphabetics of equal case (C<b-e>, C<B-E>), or digits (C<1-4>).
+Anything else is unclear (and unportable unless C<\N{...}> is used).  If
+in doubt, spell out the character sets in full.
 
 Options:
 
     c  Complement the SEARCHLIST.
     d  Delete found but unreplaced characters.
-    s  Squash duplicate replaced characters.
     r  Return the modified string and leave the original string
        untouched.
+    s  Squash duplicate replaced characters.
 
-If the C</c> modifier is specified, the I<SEARCHLIST> character set
-is complemented. So for example these two are equivalent (the exact
-maximum number will depend on your platform):
-
-    tr/\x00-\xfd/ABCD/c
-    tr/\xfe-\x{7fffffff}/ABCD/
+If the C</d> modifier is specified, any characters specified by
+I<SEARCHLIST>  not found in I<REPLACEMENTLIST> are deleted.  (Note that
+this is slightly more flexible than the behavior of some B<tr> programs,
+which delete anything they find in the I<SEARCHLIST>, period.)
 
-If the C</d> modifier is specified, any characters
-specified by I<SEARCHLIST> not found in I<REPLACEMENTLIST> are deleted.
-(Note that this is slightly more flexible than the behavior of some
-B<tr> programs, which delete anything they find in the I<SEARCHLIST>,
-period.)
+If the C</s> modifier is specified, sequences of characters, all in a
+row, that were transliterated to the same character are squashed down to
+a single instance of that character.
 
-If the C</s> modifier is specified, runs of the same character in the
-result, where each those characters were substituted by the
-transliteration, are squashed down to a single instance of the character.
+ my $a = "aaaba"
+ $a =~ tr/a/a/s     # $a now is "aba"
 
 If the C</d> modifier is used, the I<REPLACEMENTLIST> is always interpreted
 exactly as specified.  Otherwise, if the I<REPLACEMENTLIST> is shorter
-than the I<SEARCHLIST>, the final character is replicated till it is long
-enough.  If the I<REPLACEMENTLIST> is empty, the I<SEARCHLIST> is replicated.
-This latter is useful for counting characters in a class or for
-squashing character sequences in a class. For example, each of these pairs
-are equivalent:
+than the I<SEARCHLIST>, the final character, if any, is replicated until
+it is long enough.  There won't be a final character if and only if the
+I<REPLACEMENTLIST> is empty, in which case I<REPLACEMENTLIST> is
+copied from I<SEARCHLIST>.    An empty I<REPLACEMENTLIST> is useful
+for counting characters in a class, or for squashing character sequences
+in a class.
 
     tr/abcd//            tr/abcd/abcd/
     tr/abcd/AB/          tr/abcd/ABBB/
     tr/abcd//d           s/[abcd]//g
     tr/abcd/AB/d         (tr/ab/AB/ + s/[cd]//g)  - but run together
 
+If the C</c> modifier is specified, the characters to be transliterated
+are the ones NOT in I<SEARCHLIST>, that is, it is complemented.  If
+C</d> and/or C</s> are also specified, they apply to the complemented
+I<SEARCHLIST>.  Recall, that if I<REPLACEMENTLIST> is empty (except
+under C</d>) a copy of I<SEARCHLIST> is used instead.  That copy is made
+after complementing under C</c>.  I<SEARCHLIST> is sorted by code point
+order after complementing, and any I<REPLACEMENTLIST>  is applied to
+that sorted result.  This means that under C</c>, the order of the
+characters specified in I<SEARCHLIST> is irrelevant.  This can
+lead to different results on EBCDIC systems if I<REPLACEMENTLIST>
+contains more than one character, hence it is generally non-portable to
+use C</c> with such a I<REPLACEMENTLIST>.
+
+Another way of describing the operation is this:
+If C</c> is specified, the I<SEARCHLIST> is sorted by code point order,
+then complemented.  If I<REPLACEMENTLIST> is empty and C</d> is not
+specified, I<REPLACEMENTLIST> is replaced by a copy of I<SEARCHLIST> (as
+modified under C</c>), and these potentially modified lists are used as
+the basis for what follows.  Any character in the target string that
+isn't in I<SEARCHLIST> is passed through unchanged.  Every other
+character in the target string is replaced by the character in
+I<REPLACEMENTLIST> that positionally corresponds to its mate in
+I<SEARCHLIST>, except that under C</s>, the 2nd and following characters
+are squeezed out in a sequence of characters in a row that all translate
+to the same character.  If I<SEARCHLIST> is longer than
+I<REPLACEMENTLIST>, characters in the target string that match a
+character in I<SEARCHLIST> that doesn't have a correspondence in
+I<REPLACEMENTLIST> are either deleted from the target string if C</d> is
+specified; or replaced by the final character in I<REPLACEMENTLIST> if
+C</d> isn't specified.
+
 Some examples:
 
-    $ARGV[1] =~ tr/A-Z/a-z/;   # canonicalize to lower case ASCII
+ $ARGV[1] =~ tr/A-Z/a-z/;   # canonicalize to lower case ASCII
+
+ $cnt = tr/*/*/;            # count the stars in $_
+ $cnt = tr/*//;             # same thing
+
+ $cnt = $sky =~ tr/*/*/;    # count the stars in $sky
+ $cnt = $sky =~ tr/*//;     # same thing
 
-    $cnt = tr/*/*/;            # count the stars in $_
+ $cnt = $sky =~ tr/*//c;    # count all the non-stars in $sky
+ $cnt = $sky =~ tr/*/*/c;   # same, but transliterate each non-star
+                            # into a star, leaving the already-stars
+                            # alone.  Afterwards, everything in $sky
+                            # is a star.
 
-    $cnt = $sky =~ tr/*/*/;    # count the stars in $sky
+ $cnt = tr/0-9//;           # count the ASCII digits in $_
 
-    $cnt = tr/0-9//;           # count the digits in $_
+ tr/a-zA-Z//s;              # bookkeeper -> bokeper
+ tr/o/o/s;                  # bookkeeper -> bokkeeper
+ tr/oe/oe/s;                # bookkeeper -> bokkeper
+ tr/oe//s;                  # bookkeeper -> bokkeper
+ tr/oe/o/s;                 # bookkeeper -> bokkopor
 
-    tr/a-zA-Z//s;              # bookkeeper -> bokeper
+ ($HOST = $host) =~ tr/a-z/A-Z/;
+  $HOST = $host  =~ tr/a-z/A-Z/r; # same thing
 
-    ($HOST = $host) =~ tr/a-z/A-Z/;
-     $HOST = $host  =~ tr/a-z/A-Z/r;   # same thing
+ $HOST = $host =~ tr/a-z/A-Z/r   # chained with s///r
+               =~ s/:/ -p/r;
 
-    $HOST = $host =~ tr/a-z/A-Z/r    # chained with s///r
-                  =~ s/:/ -p/r;
+ tr/a-zA-Z/ /cs;                 # change non-alphas to single space
 
-    tr/a-zA-Z/ /cs;            # change non-alphas to single space
+ @stripped = map tr/a-zA-Z/ /csr, @original;
+                                 # /r with map
 
-    @stripped = map tr/a-zA-Z/ /csr, @original;
-                               # /r with map
+ tr [\200-\377]
+    [\000-\177];                 # wickedly delete 8th bit
 
-    tr [\200-\377]
-       [\000-\177];            # wickedly delete 8th bit
+ $foo !~ tr/A/a/    # transliterate all the A's in $foo to 'a',
+                    # return 0 if any were found and changed.
+                    # Otherwise return 1
 
 If multiple transliterations are given for a character, only the
 first one is used:
 
-    tr/AAA/XYZ/
+ tr/AAA/XYZ/
 
 will transliterate any A to X.
 
@@ -2567,10 +2616,10 @@ the I<SEARCHLIST> nor the I<REPLACEMENTLIST> are 
subjected to double quote
 interpolation.  That means that if you want to use variables, you
 must use an C<eval()>:
 
-    eval "tr/$oldlist/$newlist/";
-    die $@ if $@;
+ eval "tr/$oldlist/$newlist/";
+ die $@ if $@;
 
-    eval "tr/$oldlist/$newlist/, 1" or die $@;
+ eval "tr/$oldlist/$newlist/, 1" or die $@;
 
 =item C<< <<I<EOF> >>
 X<here-doc> X<heredoc> X<here-document> X<<< << >>>
diff --git a/regen/ebcdic.pl b/regen/ebcdic.pl
index 0e40b13204..cfb4d4ea07 100644
--- a/regen/ebcdic.pl
+++ b/regen/ebcdic.pl
@@ -51,6 +51,22 @@ sub get_column_headers ($$;$) {
     return $header . "*/\n";
 }
 
+sub output_table_start($$$) {
+    my ($out_fh, $TYPE, $name) = @_;
+
+    my $declaration = "EXTCONST $TYPE $name\[\]";
+    print $out_fh <<EOF;
+#  ifndef DOINIT
+    $declaration;
+#  else
+    $declaration = {
+EOF
+}
+
+sub output_table_end($) {
+    print $out_fh "};\n#  endif\n\n";
+}
+
 sub output_table ($$;$) {
     my $table_ref = shift;
     my $name = shift;
@@ -124,13 +140,7 @@ EOF
     my $TYPE = 'U8';
     $TYPE = 'U16' if grep { $_ > 255 } @$table_ref;
 
-    my $declaration = "EXTCONST $TYPE $name\[\]";
-    print $out_fh <<EOF;
-#  ifndef DOINIT
-#    $declaration;
-#  else
-#    $declaration = {
-EOF
+    output_table_start $out_fh, $TYPE, $name;
 
     # First the headers for the columns
     print $out_fh get_column_headers($row_hdr_length, $field_width);
@@ -192,7 +202,7 @@ EOF
     print $out_fh get_column_headers($row_hdr_length, $field_width,
                                      ($is_dfa) ? $columns_after_256 : undef);
 
-    print $out_fh "};\n#  endif\n\n";
+    output_table_end($out_fh);
 }
 
 print $out_fh <<'END';

-- 
Perl5 Master Repository

[perl.git] branch blead updated. v5.29.8-24-g966b4e4752

Reply via email to