https://github.com/python/cpython/commit/987e45e6326c6174fb7a300f44b9d8e4e26370c9
commit: 987e45e6326c6174fb7a300f44b9d8e4e26370c9
branch: main
author: Petr Viktorin <encu...@gmail.com>
committer: encukou <encu...@gmail.com>
date: 2025-05-02T18:30:40+02:00
summary:

gh-128972: Add `_Py_ALIGN_AS` and revert `PyASCIIObject` memory layout. 
(GH-133085)

Add `_Py_ALIGN_AS` as per C API WG vote: 
https://github.com/capi-workgroup/decisions/issues/61
This patch only adds it to free-threaded builds; the `#ifdef Py_GIL_DISABLED`
can be removed in the future.

Use this to revert `PyASCIIObject` memory layout for non-free-threaded builds.
The long-term plan is to deprecate the entire struct; until that happens
it's better to keep it unchanged, as courtesy to people that rely on it despite
it not being stable ABI.

files:
A Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst
M Include/cpython/unicodeobject.h
M Include/pymacro.h
M Objects/unicodeobject.c

diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index e8b04d158b0805..136f5d5c5f8425 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -99,6 +99,11 @@ typedef struct {
     PyObject_HEAD
     Py_ssize_t length;          /* Number of code points in the string */
     Py_hash_t hash;             /* Hash value; -1 if not set */
+#ifdef Py_GIL_DISABLED
+    /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
+       In the non-free-threaded build, we'll use explicit padding instead */
+   _Py_ALIGN_AS(4)
+#endif
     struct {
         /* If interned is non-zero, the two references from the
            dictionary to this object are *not* counted in ob_refcnt.
@@ -109,7 +114,12 @@ typedef struct {
                3: Interned, Immortal, and Static
            This categorization allows the runtime to determine the right
            cleanup mechanism at runtime shutdown. */
-        uint16_t interned;
+#ifdef Py_GIL_DISABLED
+        // Needs to be accessed atomically, so can't be a bit field.
+        unsigned char interned;
+#else
+        unsigned int interned:2;
+#endif
         /* Character size:
 
            - PyUnicode_1BYTE_KIND (1):
@@ -132,23 +142,23 @@ typedef struct {
              * all characters are in the range U+0000-U+10FFFF
              * at least one character is in the range U+10000-U+10FFFF
          */
-        unsigned short kind:3;
+        unsigned int kind:3;
         /* Compact is with respect to the allocation scheme. Compact unicode
            objects only require one memory block while non-compact objects use
            one block for the PyUnicodeObject struct and another for its data
            buffer. */
-        unsigned short compact:1;
+        unsigned int compact:1;
         /* The string only contains characters in the range U+0000-U+007F 
(ASCII)
            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
            set, use the PyASCIIObject structure. */
-        unsigned short ascii:1;
+        unsigned int ascii:1;
         /* The object is statically allocated. */
-        unsigned short statically_allocated:1;
+        unsigned int statically_allocated:1;
+#ifndef Py_GIL_DISABLED
         /* Padding to ensure that PyUnicode_DATA() is always aligned to
-           4 bytes (see issue #19537 on m68k) and we use unsigned short to 
avoid
-           the extra four bytes on 32-bit Windows. This is restricted features
-           for specific compilers including GCC, MSVC, Clang and IBM's XL 
compiler. */
-        unsigned short :10;
+           4 bytes (see issue gh-63736 on m68k) */
+        unsigned int :24;
+#endif
     } state;
 } PyASCIIObject;
 
@@ -198,7 +208,7 @@ typedef struct {
 /* Use only if you know it's a string */
 static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
 #ifdef Py_GIL_DISABLED
-    return 
_Py_atomic_load_uint16_relaxed(&_PyASCIIObject_CAST(op)->state.interned);
+    return 
_Py_atomic_load_uint8_relaxed(&_PyASCIIObject_CAST(op)->state.interned);
 #else
     return _PyASCIIObject_CAST(op)->state.interned;
 #endif
diff --git a/Include/pymacro.h b/Include/pymacro.h
index a82f347866e8d0..218987a80b0d91 100644
--- a/Include/pymacro.h
+++ b/Include/pymacro.h
@@ -23,6 +23,47 @@
 #  define static_assert _Static_assert
 #endif
 
+
+// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
+// We currently use alignas for free-threaded builds only; additional compat
+// checking would be great before we add it to the default build.
+// Standards/compiler support:
+// - `alignas` is a keyword in C23 and C++11.
+// - `_Alignas` is a keyword in C11
+// - GCC & clang has __attribute__((aligned))
+//   (use that for older standards in pedantic mode)
+// - MSVC has __declspec(align)
+// - `_Alignas` is common C compiler extension
+// Older compilers may name it differently; to allow compilation on such
+// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
+// defined. Note that defining it wrong (including defining it to nothing) will
+// cause ABI incompatibilities.
+#ifdef Py_GIL_DISABLED
+#   ifndef _Py_ALIGN_AS
+#       ifdef __cplusplus
+#           if __cplusplus >= 201103L
+#               define _Py_ALIGN_AS(V) alignas(V)
+#           elif defined(__GNUC__) || defined(__clang__)
+#               define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
+#           elif defined(_MSC_VER)
+#               define _Py_ALIGN_AS(V) __declspec(align(V))
+#           else
+#               define _Py_ALIGN_AS(V) alignas(V)
+#           endif
+#       elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#           define _Py_ALIGN_AS(V) alignas(V)
+#       elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#           define _Py_ALIGN_AS(V) _Alignas(V)
+#       elif (defined(__GNUC__) || defined(__clang__))
+#           define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
+#       elif defined(_MSC_VER)
+#           define _Py_ALIGN_AS(V) __declspec(align(V))
+#       else
+#           define _Py_ALIGN_AS(V) _Alignas(V)
+#       endif
+#   endif
+#endif
+
 /* Minimum value between x and y */
 #define Py_MIN(x, y) (((x) > (y)) ? (y) : (x))
 
diff --git 
a/Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst 
b/Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst
new file mode 100644
index 00000000000000..4b6a6e3606ff16
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst
@@ -0,0 +1,3 @@
+For non-free-threaded builds, the memory layout of :c:struct:`PyASCIIObject`
+is reverted to match Python 3.13. (Note that the structure is not part of
+stable ABI and so its memory layout is *guaranteed* to remain stable.)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 462323a828ef49..eb3e1c48fd4050 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -15918,7 +15918,7 @@ immortalize_interned(PyObject *s)
         _Py_DecRefTotal(_PyThreadState_GET());
     }
 #endif
-    FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, 
SSTATE_INTERNED_IMMORTAL);
+    FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, 
SSTATE_INTERNED_IMMORTAL);
     _Py_SetImmortal(s);
 }
 
@@ -16036,7 +16036,7 @@ intern_common(PyInterpreterState *interp, PyObject *s 
/* stolen */,
         Py_DECREF(s);
         Py_DECREF(s);
     }
-    FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, 
SSTATE_INTERNED_MORTAL);
+    FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, 
SSTATE_INTERNED_MORTAL);
 
     /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */
 
@@ -16172,7 +16172,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
             Py_UNREACHABLE();
         }
         if (!shared) {
-            FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, 
SSTATE_NOT_INTERNED);
+            FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, 
SSTATE_NOT_INTERNED);
         }
     }
 #ifdef INTERNED_STATS

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to