In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/0ed2b00b2bd6a650391433ad8733983692df43d5?hp=f2645549e6a4bfca055b5fd1932da462df424921>

- Log -----------------------------------------------------------------
commit 0ed2b00b2bd6a650391433ad8733983692df43d5
Author: Karl Williamson <[email protected]>
Date:   Mon May 12 18:29:41 2014 -0600

    Fix definition of toCTRL() for EBCDIC
    
    The definition was incorrect.  When going from control to printable
    name, we need to go from Latin1 -> Native, so that e.g., a 65 gets
    turned into the native 'A'

M       handy.h
M       utf8.h
M       utfebcdic.h

commit dd9bc2b0af8e838ed989897601a0ee36eeed092f
Author: Karl Williamson <[email protected]>
Date:   Tue May 6 13:18:28 2014 -0600

    Add some (UN)?LIKELY() to UTF8 handling
    
    It's very rare actually for code to be presented with malformed UTF-8,
    so give the compiler a hint about the likely branches.

M       regcharclass.h
M       regen/regcharclass.pl
M       utf8.h
-----------------------------------------------------------------------

Summary of changes:
 handy.h               | 12 +++++++-----
 regcharclass.h        | 16 ++++++++--------
 regen/regcharclass.pl |  7 ++++---
 utf8.h                | 10 +++++++---
 utfebcdic.h           |  4 ++++
 5 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/handy.h b/handy.h
index 13f8d22..740ebe5 100644
--- a/handy.h
+++ b/handy.h
@@ -1653,11 +1653,13 @@ EXTCONST U32 PL_charclass[];
 #ifndef EBCDIC
 #  define toCTRL(c)    (toUPPER(c) ^ 64)
 #else
-#  define toCTRL(c)    ((c) == '?'                               \
-                        ? LATIN1_TO_NATIVE(0x9F)                 \
-                        : (c) == LATIN1_TO_NATIVE(0x9F)          \
-                          ? '?'                                  \
-                          : (NATIVE_TO_LATIN1(toUPPER(c)) ^ 64))
+#  define toCTRL(c)    ((isPRINT_A(c))                          \
+                       ? UNLIKELY((c) == '?')                   \
+                         ? QUESTION_MARK_CTRL                   \
+                         : (NATIVE_TO_LATIN1(toUPPER(c)) ^ 64)  \
+                       : UNLIKELY((c) == QUESTION_MARK_CTRL)    \
+                         ? ((c) == '?')                         \
+                         : (LATIN1_TO_NATIVE((c) ^ 64)))
 #endif
 
 /* Line numbers are unsigned, 32 bits. */
diff --git a/regcharclass.h b/regcharclass.h
index ebda2f7..7de537b 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -708,11 +708,11 @@
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
-( ((e) > (s)) ?                                                             \
+( ( LIKELY((e) > (s)) ) ?                                                   \
     ( ( ( 0x09 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x20 == ((U8*)s)[0] 
) ? 1\
     : (! is_utf8 ) ?                                                        \
            ( 0x85 == ((U8*)s)[0] )                                         \
-       : (((e) - (s)) >= UTF8SKIP(s)) ?                                    \
+       : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                          \
            ( ( 0xC2 == ((U8*)s)[0] ) ?                                     \
                ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 )                         \
            : ( ( ( 0xE2 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( ( 
((U8*)s)[2] & 0xFE ) == 0x8E || ( ((U8*)s)[2] & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\
@@ -1427,9 +1427,9 @@
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
-( ((e) > (s)) ?                                                             \
+( ( LIKELY((e) > (s)) ) ?                                                   \
     ( ( ( ( ((U8*)s)[0] & 0xEF ) == 0x05 ) || ((U8*)s)[0] == 0x0B || ( ( 
((U8*)s)[0] & 0xFE ) == 0x0C ) || ((U8*)s)[0] == 0x25 || ((U8*)s)[0] == 0x40 ) 
? 1\
-    : ( ( is_utf8 && (((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xCA == ((U8*)s)[0] ) 
) ? ( ( 0x41 == ((U8*)s)[1] ) ?\
+    : ( ( is_utf8 && LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xCA == 
((U8*)s)[0] ) ) ? ( ( 0x41 == ((U8*)s)[1] ) ?\
                    ( ( 0x55 == ((U8*)s)[2] || 0x56 == ((U8*)s)[2] ) ? 3 : 0 )\
                : ( ( 0x42 == ((U8*)s)[1] ) && ( 0x49 == ((U8*)s)[2] || 0x4A == 
((U8*)s)[2] ) ) ? 3 : 0 ) : 0 )\
 : 0 )
@@ -2152,9 +2152,9 @@
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
-( ((e) > (s)) ?                                                             \
+( ( LIKELY((e) > (s)) ) ?                                                   \
     ( ( ( ( ((U8*)s)[0] & 0xEF ) == 0x05 ) || ((U8*)s)[0] == 0x0B || ( ( 
((U8*)s)[0] & 0xFE ) == 0x0C ) || ((U8*)s)[0] == 0x25 || ((U8*)s)[0] == 0x40 ) 
? 1\
-    : ( ( is_utf8 && (((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xCA == ((U8*)s)[0] ) 
) ? ( ( 0x41 == ((U8*)s)[1] ) ?\
+    : ( ( is_utf8 && LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xCA == 
((U8*)s)[0] ) ) ? ( ( 0x41 == ((U8*)s)[1] ) ?\
                    ( ( 0x55 == ((U8*)s)[2] || 0x56 == ((U8*)s)[2] ) ? 3 : 0 )\
                : ( ( 0x42 == ((U8*)s)[1] ) && ( 0x49 == ((U8*)s)[2] || 0x4A == 
((U8*)s)[2] ) ) ? 3 : 0 ) : 0 )\
 : 0 )
@@ -2877,9 +2877,9 @@
 */
 /*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
-( ((e) > (s)) ?                                                             \
+( ( LIKELY((e) > (s)) ) ?                                                   \
     ( ( ( ( ((U8*)s)[0] & 0xEF ) == 0x05 ) || ((U8*)s)[0] == 0x0B || ( ( 
((U8*)s)[0] & 0xFE ) == 0x0C ) || ((U8*)s)[0] == 0x25 || ((U8*)s)[0] == 0x40 ) 
? 1\
-    : ( ( is_utf8 && (((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xCA == ((U8*)s)[0] ) 
) ? ( ( 0x41 == ((U8*)s)[1] ) ?\
+    : ( ( is_utf8 && LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xCA == 
((U8*)s)[0] ) ) ? ( ( 0x41 == ((U8*)s)[1] ) ?\
                    ( ( ( ((U8*)s)[2] & 0xFE ) == 0x56 ) ? 3 : 0 )          \
                : ( ( 0x42 == ((U8*)s)[1] ) && ( 0x49 == ((U8*)s)[2] || 0x51 == 
((U8*)s)[2] ) ) ? 3 : 0 ) : 0 )\
 : 0 )
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 1f453e8..7919041 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -671,8 +671,9 @@ sub length_optree {
             # have only a few things that can match past this, so I (khw)
             # don't think it is worth it.  (Even better would be to use
             # calculate_mask(keys %$utf8) instead of UTF8_IS_START, and use it
-            # if it saves a bunch.
-            my $cond = "(((e) - (s)) >= UTF8SKIP(s))";
+            # if it saves a bunch.  We assume that input text likely to be
+            # well-formed .
+            my $cond = "LIKELY(((e) - (s)) >= UTF8SKIP(s))";
             $else = __cond_join($cond, $utf8, $else);
 
             # For 'generic', we also will want the latin1 UTF-8 variants for
@@ -715,7 +716,7 @@ sub length_optree {
             }
 
             # We need at least one byte available to start off the tests
-            $else = __cond_join("((e) > (s))", $else, 0);
+            $else = __cond_join("LIKELY((e) > (s))", $else, 0);
         }
         else {  # Here, we don't want or there aren't any variants.  A single
                 # byte available is enough.
diff --git a/utf8.h b/utf8.h
index 74e7d48..8945663 100644
--- a/utf8.h
+++ b/utf8.h
@@ -270,6 +270,10 @@ Perl's extended UTF-8 means we can have start bytes up to 
FF.
 #error UTF8_MAXBYTES must be at least 12
 #endif
 
+/* ^? is defined to be DEL on ASCII systems.  See the definition of toCTRL()
+ * for more */
+#define QUESTION_MARK_CTRL  DEL_NATIVE
+
 #define MAX_UTF8_TWO_BYTE 0x7FF
 
 #define UTF8_MAXBYTES_CASE     UTF8_MAXBYTES
@@ -639,13 +643,13 @@ machines) is a valid UTF-8 character.
 =cut
 */
 
-#define isUTF8_CHAR(s, e)   (((e) <= (s))                                   \
+#define isUTF8_CHAR(s, e)   (UNLIKELY((e) <= (s))                           \
                              ? 0                                            \
                              : (UTF8_IS_INVARIANT(*s))                      \
                                ? 1                                          \
-                               : (((e) - (s)) < UTF8SKIP(s))                \
+                               : UNLIKELY(((e) - (s)) < UTF8SKIP(s))        \
                                  ? 0                                        \
-                                 : (IS_UTF8_CHAR_FAST(UTF8SKIP(s)))         \
+                                 : LIKELY(IS_UTF8_CHAR_FAST(UTF8SKIP(s)))   \
                                    ? is_UTF8_CHAR_utf8_no_length_checks(s)  \
                                    : _is_utf8_char_slow(s, e))
 
diff --git a/utfebcdic.h b/utfebcdic.h
index 1211c9f..7eec66f 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -194,6 +194,10 @@ END_EXTERN_C
  * character occupies 5 bytes, therefore this number is 15 */
 #define UTF8_MAXBYTES_CASE     15
 
+/* ^? is defined to be APC on EBCDIC systems.  See the definition of toCTRL()
+ * for more */
+#define QUESTION_MARK_CTRL   LATIN1_TO_NATIVE(0x9F)
+
 #define MAX_UTF8_TWO_BYTE 0x3FF
 
 /*

--
Perl5 Master Repository

Reply via email to