In perl.git, the branch khw/ebcdic has been updated <http://perl5.git.perl.org/perl.git/commitdiff/92a582c92e80ae80074bbc0f664eb20ccc09e451?hp=dec54ad7882c533207311a605e83dfd8e8175965>
- Log ----------------------------------------------------------------- commit 92a582c92e80ae80074bbc0f664eb20ccc09e451 Author: Karl Williamson <[email protected]> Date: Fri Mar 1 08:28:52 2013 -0700 utf8.h: Simplify UTF8_EIGHT_BIT_foo on EBCDIC These macros were previously defined in terms of UTF8_TWO_BYTE_HI and UTF8_TWO_BYTE_LO. But the EIGHT_BIT versions can use the less general and simpler NATIVE_TO_LATN1 instead of NATIVE_TO_UNI because the input domain is restricted in the EIGHT_BIT. Note that on ASCII platforms, these both expand to the same thing, so the difference matters only on EBCDIC. ----------------------------------------------------------------------- Summary of changes: utf8.h | 13 ++++++++----- 1 files changed, 8 insertions(+), 5 deletions(-) diff --git a/utf8.h b/utf8.h index eef2614..a83f33e 100644 --- a/utf8.h +++ b/utf8.h @@ -352,11 +352,14 @@ Perl's extended UTF-8 means we can have start bytes up to FF. #define UTF8_TWO_BYTE_HI(c) ((U8) (UTF8_TWO_BYTE_HI_nocast(c))) #define UTF8_TWO_BYTE_LO(c) ((U8) (UTF8_TWO_BYTE_LO_nocast(c))) -/* This name is used when the source is a single byte. For EBCDIC these could - * be more efficiently written; the reason is that things above 0xFF have to be - * special-cased, which is done by the EBCDIC version of NATIVE_TO_UNI() */ -#define UTF8_EIGHT_BIT_HI(c) UTF8_TWO_BYTE_HI((U8)(c)) -#define UTF8_EIGHT_BIT_LO(c) UTF8_TWO_BYTE_LO((U8)(c)) +/* This name is used when the source is a single byte (input not checked). + * These expand identically to the TWO_BYTE versions on ASCII platforms, but + * use to/from LATIN1 instead of UNI on EBCDIC, which eliminates tests */ +#define UTF8_EIGHT_BIT_HI(c) I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c) \ + >> UTF_ACCUMULATION_SHIFT) | (0xFF & UTF_START_MARK(2))) +#define UTF8_EIGHT_BIT_LO(c) I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c) \ + & UTF_CONTINUATION_MASK) \ + | UTF_CONTINUATION_MARK) /* * 'UTF' is whether or not p is encoded in UTF8. The names 'foo_lazy_if' stem -- Perl5 Master Repository
