In perl.git, the branch khw/ebcdic has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/92a582c92e80ae80074bbc0f664eb20ccc09e451?hp=dec54ad7882c533207311a605e83dfd8e8175965>

- Log -----------------------------------------------------------------
commit 92a582c92e80ae80074bbc0f664eb20ccc09e451
Author: Karl Williamson <[email protected]>
Date:   Fri Mar 1 08:28:52 2013 -0700

    utf8.h: Simplify UTF8_EIGHT_BIT_foo on EBCDIC
    
    These macros were previously defined in terms of UTF8_TWO_BYTE_HI and
    UTF8_TWO_BYTE_LO.  But the EIGHT_BIT versions can use the less general
    and simpler NATIVE_TO_LATN1 instead of NATIVE_TO_UNI because the input
    domain is restricted in the EIGHT_BIT.  Note that on ASCII platforms,
    these both expand to the same thing, so the difference matters only on
    EBCDIC.
-----------------------------------------------------------------------

Summary of changes:
 utf8.h |   13 ++++++++-----
 1 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/utf8.h b/utf8.h
index eef2614..a83f33e 100644
--- a/utf8.h
+++ b/utf8.h
@@ -352,11 +352,14 @@ Perl's extended UTF-8 means we can have start bytes up to 
FF.
 #define UTF8_TWO_BYTE_HI(c)    ((U8) (UTF8_TWO_BYTE_HI_nocast(c)))
 #define UTF8_TWO_BYTE_LO(c)    ((U8) (UTF8_TWO_BYTE_LO_nocast(c)))
 
-/* This name is used when the source is a single byte.  For EBCDIC these could
- * be more efficiently written; the reason is that things above 0xFF have to be
- * special-cased, which is done by the EBCDIC version of NATIVE_TO_UNI() */
-#define UTF8_EIGHT_BIT_HI(c)   UTF8_TWO_BYTE_HI((U8)(c))
-#define UTF8_EIGHT_BIT_LO(c)   UTF8_TWO_BYTE_LO((U8)(c))
+/* This name is used when the source is a single byte (input not checked).
+ * These expand identically to the TWO_BYTE versions on ASCII platforms, but
+ * use to/from LATIN1 instead of UNI on EBCDIC, which eliminates tests */
+#define UTF8_EIGHT_BIT_HI(c)   I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c)          
\
+                        >> UTF_ACCUMULATION_SHIFT) | (0xFF & 
UTF_START_MARK(2)))
+#define UTF8_EIGHT_BIT_LO(c)   I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c)          
\
+                                                  & UTF_CONTINUATION_MASK)     
 \
+                                                | UTF_CONTINUATION_MARK)
 
 /*
  * 'UTF' is whether or not p is encoded in UTF8.  The names 'foo_lazy_if' stem

--
Perl5 Master Repository

Reply via email to