https://github.com/python/cpython/commit/987e45e6326c6174fb7a300f44b9d8e4e26370c9 commit: 987e45e6326c6174fb7a300f44b9d8e4e26370c9 branch: main author: Petr Viktorin <encu...@gmail.com> committer: encukou <encu...@gmail.com> date: 2025-05-02T18:30:40+02:00 summary:
gh-128972: Add `_Py_ALIGN_AS` and revert `PyASCIIObject` memory layout. (GH-133085) Add `_Py_ALIGN_AS` as per C API WG vote: https://github.com/capi-workgroup/decisions/issues/61 This patch only adds it to free-threaded builds; the `#ifdef Py_GIL_DISABLED` can be removed in the future. Use this to revert `PyASCIIObject` memory layout for non-free-threaded builds. The long-term plan is to deprecate the entire struct; until that happens it's better to keep it unchanged, as courtesy to people that rely on it despite it not being stable ABI. files: A Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst M Include/cpython/unicodeobject.h M Include/pymacro.h M Objects/unicodeobject.c diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index e8b04d158b0805..136f5d5c5f8425 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -99,6 +99,11 @@ typedef struct { PyObject_HEAD Py_ssize_t length; /* Number of code points in the string */ Py_hash_t hash; /* Hash value; -1 if not set */ +#ifdef Py_GIL_DISABLED + /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. + In the non-free-threaded build, we'll use explicit padding instead */ + _Py_ALIGN_AS(4) +#endif struct { /* If interned is non-zero, the two references from the dictionary to this object are *not* counted in ob_refcnt. @@ -109,7 +114,12 @@ typedef struct { 3: Interned, Immortal, and Static This categorization allows the runtime to determine the right cleanup mechanism at runtime shutdown. */ - uint16_t interned; +#ifdef Py_GIL_DISABLED + // Needs to be accessed atomically, so can't be a bit field. + unsigned char interned; +#else + unsigned int interned:2; +#endif /* Character size: - PyUnicode_1BYTE_KIND (1): @@ -132,23 +142,23 @@ typedef struct { * all characters are in the range U+0000-U+10FFFF * at least one character is in the range U+10000-U+10FFFF */ - unsigned short kind:3; + unsigned int kind:3; /* Compact is with respect to the allocation scheme. Compact unicode objects only require one memory block while non-compact objects use one block for the PyUnicodeObject struct and another for its data buffer. */ - unsigned short compact:1; + unsigned int compact:1; /* The string only contains characters in the range U+0000-U+007F (ASCII) and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is set, use the PyASCIIObject structure. */ - unsigned short ascii:1; + unsigned int ascii:1; /* The object is statically allocated. */ - unsigned short statically_allocated:1; + unsigned int statically_allocated:1; +#ifndef Py_GIL_DISABLED /* Padding to ensure that PyUnicode_DATA() is always aligned to - 4 bytes (see issue #19537 on m68k) and we use unsigned short to avoid - the extra four bytes on 32-bit Windows. This is restricted features - for specific compilers including GCC, MSVC, Clang and IBM's XL compiler. */ - unsigned short :10; + 4 bytes (see issue gh-63736 on m68k) */ + unsigned int :24; +#endif } state; } PyASCIIObject; @@ -198,7 +208,7 @@ typedef struct { /* Use only if you know it's a string */ static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { #ifdef Py_GIL_DISABLED - return _Py_atomic_load_uint16_relaxed(&_PyASCIIObject_CAST(op)->state.interned); + return _Py_atomic_load_uint8_relaxed(&_PyASCIIObject_CAST(op)->state.interned); #else return _PyASCIIObject_CAST(op)->state.interned; #endif diff --git a/Include/pymacro.h b/Include/pymacro.h index a82f347866e8d0..218987a80b0d91 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -23,6 +23,47 @@ # define static_assert _Static_assert #endif + +// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword, +// We currently use alignas for free-threaded builds only; additional compat +// checking would be great before we add it to the default build. +// Standards/compiler support: +// - `alignas` is a keyword in C23 and C++11. +// - `_Alignas` is a keyword in C11 +// - GCC & clang has __attribute__((aligned)) +// (use that for older standards in pedantic mode) +// - MSVC has __declspec(align) +// - `_Alignas` is common C compiler extension +// Older compilers may name it differently; to allow compilation on such +// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already +// defined. Note that defining it wrong (including defining it to nothing) will +// cause ABI incompatibilities. +#ifdef Py_GIL_DISABLED +# ifndef _Py_ALIGN_AS +# ifdef __cplusplus +# if __cplusplus >= 201103L +# define _Py_ALIGN_AS(V) alignas(V) +# elif defined(__GNUC__) || defined(__clang__) +# define _Py_ALIGN_AS(V) __attribute__((aligned(V))) +# elif defined(_MSC_VER) +# define _Py_ALIGN_AS(V) __declspec(align(V)) +# else +# define _Py_ALIGN_AS(V) alignas(V) +# endif +# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +# define _Py_ALIGN_AS(V) alignas(V) +# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define _Py_ALIGN_AS(V) _Alignas(V) +# elif (defined(__GNUC__) || defined(__clang__)) +# define _Py_ALIGN_AS(V) __attribute__((aligned(V))) +# elif defined(_MSC_VER) +# define _Py_ALIGN_AS(V) __declspec(align(V)) +# else +# define _Py_ALIGN_AS(V) _Alignas(V) +# endif +# endif +#endif + /* Minimum value between x and y */ #define Py_MIN(x, y) (((x) > (y)) ? (y) : (x)) diff --git a/Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst b/Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst new file mode 100644 index 00000000000000..4b6a6e3606ff16 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-04-28-15-36-01.gh-issue-128972.8bZMIm.rst @@ -0,0 +1,3 @@ +For non-free-threaded builds, the memory layout of :c:struct:`PyASCIIObject` +is reverted to match Python 3.13. (Note that the structure is not part of +stable ABI and so its memory layout is *guaranteed* to remain stable.) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 462323a828ef49..eb3e1c48fd4050 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15918,7 +15918,7 @@ immortalize_interned(PyObject *s) _Py_DecRefTotal(_PyThreadState_GET()); } #endif - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); _Py_SetImmortal(s); } @@ -16036,7 +16036,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, Py_DECREF(s); Py_DECREF(s); } - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */ @@ -16172,7 +16172,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) Py_UNREACHABLE(); } if (!shared) { - FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED); + FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED); } } #ifdef INTERNED_STATS _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com