In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/74665a894c3bc3da81a3d585a9c17a95b871a3bc?hp=f5fcf91032268f945100bcb5ea227e419712c779>
- Log ----------------------------------------------------------------- commit 74665a894c3bc3da81a3d585a9c17a95b871a3bc Author: Karl Williamson <[email protected]> Date: Thu Jul 28 11:47:59 2016 -0600 handy.h: Consolidate some EBCDIC vs ASCII paths This removes some '#ifdef EBCDIC' so as to make more code common between the platforms. This is at the expense of some efficiency, but the affected code only runs when compiling utilities, so ease of maintenance wins out. M handy.h commit 3f3c579de979613d3ed70c7fefd9641861dd2b72 Author: Karl Williamson <[email protected]> Date: Thu Jul 28 11:43:54 2016 -0600 handy.h: Add comment M handy.h commit 6c5b02ac7a9ff1c91f2ca46bedd89ba9012bb34f Author: Karl Williamson <[email protected]> Date: Thu Jul 28 11:40:50 2016 -0600 handy.h: Generate compile error if macros called wrong This extends the mechanism we added in 5.24 to more macros to make sure that a macro is called with an integer and not a pointer. It adds a "| 0" to the macro parameter, which is illegal if the parameter is a pointer. M handy.h commit 6f39eb491a3f31125ff9f8ab6b6a53e62255ac6c Author: Karl Williamson <[email protected]> Date: Thu Jul 28 11:39:40 2016 -0600 toke.c: White-space only re-wrap a comment. M toke.c ----------------------------------------------------------------------- Summary of changes: handy.h | 139 +++++++++++++++++++++++++++++++++------------------------------- toke.c | 6 +-- 2 files changed, 75 insertions(+), 70 deletions(-) diff --git a/handy.h b/handy.h index b1b50ff..b7cd9f5 100644 --- a/handy.h +++ b/handy.h @@ -910,7 +910,10 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc * of operands. Well, they are, but that is kind of the point. */ #ifndef __COVERITY__ -#define FITS_IN_8_BITS(c) ((sizeof(c) == 1) || !(((WIDEST_UTYPE)(c)) & ~0xFF)) + /* The '| 0' part ensures a compiler error if c is not integer (like e.g., a + * pointer) */ +#define FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \ + || !(((WIDEST_UTYPE)(c | 0)) & ~0xFF)) #else #define FITS_IN_8_BITS(c) (1) #endif @@ -925,7 +928,16 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc /* There is a simple definition of ASCII for ASCII platforms. But the * EBCDIC one isn't so simple, so is defined using table look-up like the * other macros below. - * The '| 0' part ensures that c is an integer (and not e.g. a pointer) */ + * + * The cast here is used instead of '(c) >= 0', because some compilers emit + * a warning that that test is always true when the parameter is an + * unsigned type. khw supposes that it could be written as + * && ((c) == '\0' || (c) > 0) + * to avoid the message, but the cast will likely avoid extra branches even + * with stupid compilers. + * + * The '| 0' part ensures a compiler error if c is not integer (like e.g., + * a pointer) */ # define isASCII(c) ((WIDEST_UTYPE)((c) | 0) < 128) #endif @@ -1141,82 +1153,75 @@ END_EXTERN_C /* If we don't have perl.h, we are compiling a utility program. Below we * hard-code various macro definitions that wouldn't otherwise be available - * to it. Most are coded based on first principals. First some ones common - * to both ASCII and EBCDIC */ + * to it. Most are coded based on first principals. These are written to + * avoid EBCDIC vs. ASCII #ifdef's as much as possible. */ # define isDIGIT_A(c) ((c) <= '9' && (c) >= '0') # define isBLANK_A(c) ((c) == ' ' || (c) == '\t') -# define isSPACE_A(c) (isBLANK_A(c) \ - || (c) == '\n' \ - || (c) == '\r' \ - || (c) == '\v' \ +# define isSPACE_A(c) (isBLANK_A(c) \ + || (c) == '\n' \ + || (c) == '\r' \ + || (c) == '\v' \ || (c) == '\f') -# ifdef EBCDIC /* There are gaps between 'i' and 'j'; 'r' and 's'. Same - for uppercase. This is ordered to exclude most things - early */ -# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z' \ - && ((c) <= 'i' \ - || ((c) >= 'j' && (c) <= 'r') \ - || (c) >= 's')) -# define isUPPER_A(c) ((c) >= 'A' && (c) <= 'Z' \ - && ((c) <= 'I' \ - || ((c) >= 'J' && (c) <= 'R') \ - || (c) >= 'S')) -# else /* ASCII platform. */ -# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z') -# define isUPPER_A(c) ((c) <= 'Z' && (c) >= 'A') -# endif - - /* Some more ASCII, non-ASCII common definitions */ + /* On EBCDIC, there are gaps between 'i' and 'j'; 'r' and 's'. Same for + * uppercase. The tests for those aren't necessary on ASCII, but hurt only + * performance (if optimization isn't on), and allow the same code to be + * used for both platform types */ +# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z' \ + && ( (c) <= 'i' \ + || ((c) >= 'j' && (c) <= 'r') \ + || (c) >= 's')) +# define isUPPER_A(c) ((c) >= 'A' && (c) <= 'Z' \ + && ( (c) <= 'I' \ + || ((c) >= 'J' && (c) <= 'R') \ + || (c) >= 'S')) # define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c)) # define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c)) # define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_') # define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_') -# define isXDIGIT_A(c) (isDIGIT_A(c) \ - || ((c) >= 'a' && (c) <= 'f') \ +# define isXDIGIT_A(c) (isDIGIT_A(c) \ + || ((c) >= 'a' && (c) <= 'f') \ || ((c) <= 'F' && (c) >= 'A')) +# define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \ + || (c) == '#' || (c) == '$' || (c) == '%' \ + || (c) == '&' || (c) == '\'' || (c) == '(' \ + || (c) == ')' || (c) == '*' || (c) == '+' \ + || (c) == ',' || (c) == '.' || (c) == '/' \ + || (c) == ':' || (c) == ';' || (c) == '<' \ + || (c) == '=' || (c) == '>' || (c) == '?' \ + || (c) == '@' || (c) == '[' || (c) == '\\' \ + || (c) == ']' || (c) == '^' || (c) == '_' \ + || (c) == '`' || (c) == '{' || (c) == '|' \ + || (c) == '}' || (c) == '~') +# define isGRAPH_A(c) (isALPHANUMERIC_A(c) || isPUNCT_A(c)) +# define isPRINT_A(c) (isGRAPH_A(c) || (c) == ' ') # ifdef EBCDIC -# define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \ - || (c) == '#' || (c) == '$' || (c) == '%' \ - || (c) == '&' || (c) == '\'' || (c) == '(' \ - || (c) == ')' || (c) == '*' || (c) == '+' \ - || (c) == ',' || (c) == '.' || (c) == '/' \ - || (c) == ':' || (c) == ';' || (c) == '<' \ - || (c) == '=' || (c) == '>' || (c) == '?' \ - || (c) == '@' || (c) == '[' || (c) == '\\' \ - || (c) == ']' || (c) == '^' || (c) == '_' \ - || (c) == '`' || (c) == '{' || (c) == '|' \ - || (c) == '}' || (c) == '~') -# define isGRAPH_A(c) (isALPHANUMERIC_A(c) || isPUNCT_A(c)) -# define isPRINT_A(c) (isGRAPH_A(c) || (c) == ' ') - -# ifdef QUESTION_MARK_CTRL -# define _isQMC(c) ((c) == QUESTION_MARK_CTRL) -# else -# define _isQMC(c) 0 -# endif - - /* I (khw) can't think of a way to define all the ASCII controls - * without resorting to a libc (locale-sensitive) call. But we know - * that all controls but the question-mark one are in the range 0-0x3f. - * This makes sure that all the controls that have names are included, - * and all controls that are also considered ASCII in the locale. This - * may include more or fewer than what it actually should, but the - * wrong ones are less-important controls, so likely won't impact - * things (keep in mind that this is compiled only if perl.h isn't - * available). The question mark control is included if available */ -# define isCNTRL_A(c) (((c) < 0x40 && isascii(c)) \ - || (c) == '\0' || (c) == '\a' || (c) == '\b' \ - || (c) == '\f' || (c) == '\n' || (c) == '\r' \ - || (c) == '\t' || (c) == '\v' || _isQMC(c)) - + /* The below is accurate for the 3 EBCDIC code pages traditionally + * supported by perl. The only difference between them in the controls + * is the position of \n, and that is represented symbolically below */ +# define isCNTRL_A(c) ((c) == '\0' || (c) == '\a' || (c) == '\b' \ + || (c) == '\f' || (c) == '\n' || (c) == '\r' \ + || (c) == '\t' || (c) == '\v' \ + || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \ + || (c) == 7 /* U+7F DEL */ \ + || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \ + /* DLE, DC[1-3] */ \ + || (c) == 0x18 /* U+18 CAN */ \ + || (c) == 0x19 /* U+19 EOM */ \ + || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \ + || (c) == 0x26 /* U+17 ETB */ \ + || (c) == 0x27 /* U+1B ESC */ \ + || (c) == 0x2D /* U+05 ENQ */ \ + || (c) == 0x2E /* U+06 ACK */ \ + || (c) == 0x32 /* U+16 SYN */ \ + || (c) == 0x37 /* U+04 EOT */ \ + || (c) == 0x3C /* U+14 DC4 */ \ + || (c) == 0x3D /* U+15 NAK */ \ + || (c) == 0x3F)/* U+1A SUB */ # define isASCII(c) (isCNTRL_A(c) || isPRINT_A(c)) -# else /* ASCII platform; things are simpler, and isASCII has already - been defined */ -# define isGRAPH_A(c) (((c) > ' ' && (c) < 127)) -# define isPRINT_A(c) (isGRAPH_A(c) || (c) == ' ') -# define isPUNCT_A(c) (isGRAPH_A(c) && (! isALPHANUMERIC_A(c))) -# define isCNTRL_A(c) (isASCII(c) && (! isPRINT_A(c))) +# else /* isASCII is already defined for ASCII platforms, so can use that to + define isCNTRL */ +# define isCNTRL_A(c) (isASCII(c) && ! isPRINT_A(c)) # endif /* The _L1 macros may be unnecessary for the utilities; I (khw) added them diff --git a/toke.c b/toke.c index 2c87688..70449ca 100644 --- a/toke.c +++ b/toke.c @@ -3678,9 +3678,9 @@ S_scan_const(pTHX_ char *start) } else if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8. This is because - * \N{} implies Unicode semantics, and scalars have to - * be in utf8 to guarantee those semantics; but not - * needed in tr/// */ + * \N{} implies Unicode semantics, and scalars have + * to be in utf8 to guarantee those semantics; but + * not needed in tr/// */ sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING); str = SvPV_const(res, len); } -- Perl5 Master Repository
