https://github.com/python/cpython/commit/7043bbd1ca6f0d84ad2211dd60114535ba3d51fc
commit: 7043bbd1ca6f0d84ad2211dd60114535ba3d51fc
branch: main
author: Inada Naoki <[email protected]>
committer: methane <[email protected]>
date: 2024-11-30T21:52:37+09:00
summary:
gh-127417: fix UTF-8 decoder optimization on AIX (#127433)
files:
M Objects/unicodeobject.c
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ab4f07ed054385..33fa21d4c7d1bf 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5014,21 +5014,26 @@ ctz(size_t v)
#endif /* SIZEOF_SIZE_T */
return pos;
}
+#else
+#define HAVE_CTZ 0
#endif
-#if HAVE_CTZ
-// load p[0]..p[size-1] as a little-endian size_t
-// without unaligned access nor read ahead.
+#if HAVE_CTZ && PY_LITTLE_ENDIAN
+// load p[0]..p[size-1] as a size_t without unaligned access nor read ahead.
static size_t
load_unaligned(const unsigned char *p, size_t size)
{
- assert(size <= SIZEOF_SIZE_T);
union {
size_t s;
unsigned char b[SIZEOF_SIZE_T];
} u;
u.s = 0;
+ // This switch statement assumes little endian because:
+ // * union is faster than bitwise or and shift.
+ // * big endian machine is rare and hard to maintain.
switch (size) {
+ default:
+#if SIZEOF_SIZE_T == 8
case 8:
u.b[7] = p[7];
_Py_FALLTHROUGH;
@@ -5041,6 +5046,7 @@ load_unaligned(const unsigned char *p, size_t size)
case 5:
u.b[4] = p[4];
_Py_FALLTHROUGH;
+#endif
case 4:
u.b[3] = p[3];
_Py_FALLTHROUGH;
@@ -5055,8 +5061,6 @@ load_unaligned(const unsigned char *p, size_t size)
break;
case 0:
break;
- default:
- Py_UNREACHABLE();
}
return u.s;
}
@@ -5077,8 +5081,8 @@ find_first_nonascii(const unsigned char *start, const
unsigned char *end)
if (end - start >= SIZEOF_SIZE_T) {
const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T);
+#if PY_LITTLE_ENDIAN && HAVE_CTZ
if (p < p2) {
-#if HAVE_CTZ
#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) ||
defined(__i386__)
// x86 and amd64 are little endian and can load unaligned memory.
size_t u = *(const size_t*)p & ASCII_CHAR_MASK;
@@ -5086,11 +5090,11 @@ find_first_nonascii(const unsigned char *start, const
unsigned char *end)
size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
#endif
if (u) {
- return p - start + (ctz(u) - 7) / 8;
+ return (ctz(u) - 7) / 8;
}
p = p2;
}
-#else
+#else /* PY_LITTLE_ENDIAN && HAVE_CTZ */
while (p < p2) {
if (*p & 0x80) {
return p - start;
@@ -5113,7 +5117,7 @@ find_first_nonascii(const unsigned char *start, const
unsigned char *end)
p += SIZEOF_SIZE_T;
}
}
-#if HAVE_CTZ
+#if PY_LITTLE_ENDIAN && HAVE_CTZ
// we can not use *(const size_t*)p to avoid buffer overrun.
size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK;
if (u) {
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]