https://github.com/python/cpython/commit/c81fa2b9cd1ed4d4ff5c13984d6582c27e5f2633
commit: c81fa2b9cd1ed4d4ff5c13984d6582c27e5f2633
branch: main
author: Eric Snow <[email protected]>
committer: ericsnowcurrently <[email protected]>
date: 2025-05-08T15:07:46Z
summary:
gh-132775: Add _PyCode_GetScriptXIData() (gh-133480)
This converts functions, code, str, bytes, bytearray, and memoryview objects to
PyCodeObject,
and ensure that the object looks like a script. That means no args, no return,
and no closure.
_PyCode_GetPureScriptXIData() takes it a step further and ensures there are no
globals.
We also add _PyObject_SupportedAsScript() to the internal C-API.
files:
M Include/internal/pycore_crossinterp.h
M Include/internal/pycore_pythonrun.h
M Lib/test/_code_definitions.py
M Lib/test/test_code.py
M Lib/test/test_crossinterp.py
M Modules/_testinternalcapi.c
M Python/crossinterp.c
M Python/pythonrun.c
diff --git a/Include/internal/pycore_crossinterp.h
b/Include/internal/pycore_crossinterp.h
index 9de61ef54125d5..9c9b2c2f9c599d 100644
--- a/Include/internal/pycore_crossinterp.h
+++ b/Include/internal/pycore_crossinterp.h
@@ -191,6 +191,14 @@ PyAPI_FUNC(int) _PyCode_GetXIData(
PyThreadState *,
PyObject *,
_PyXIData_t *);
+PyAPI_FUNC(int) _PyCode_GetScriptXIData(
+ PyThreadState *,
+ PyObject *,
+ _PyXIData_t *);
+PyAPI_FUNC(int) _PyCode_GetPureScriptXIData(
+ PyThreadState *,
+ PyObject *,
+ _PyXIData_t *);
/* using cross-interpreter data */
diff --git a/Include/internal/pycore_pythonrun.h
b/Include/internal/pycore_pythonrun.h
index 0bfc5704dc4c59..7daed1326af8d5 100644
--- a/Include/internal/pycore_pythonrun.h
+++ b/Include/internal/pycore_pythonrun.h
@@ -25,6 +25,7 @@ extern int _PyRun_InteractiveLoopObject(
PyObject *filename,
PyCompilerFlags *flags);
+extern int _PyObject_SupportedAsScript(PyObject *);
extern const char* _Py_SourceAsString(
PyObject *cmd,
const char *funcname,
diff --git a/Lib/test/_code_definitions.py b/Lib/test/_code_definitions.py
index d64ac45d85f396..733a15b25f6894 100644
--- a/Lib/test/_code_definitions.py
+++ b/Lib/test/_code_definitions.py
@@ -1,4 +1,32 @@
+def simple_script():
+ assert True
+
+
+def complex_script():
+ obj = 'a string'
+ pickle = __import__('pickle')
+ def spam_minimal():
+ pass
+ spam_minimal()
+ data = pickle.dumps(obj)
+ res = pickle.loads(data)
+ assert res == obj, (res, obj)
+
+
+def script_with_globals():
+ obj1, obj2 = spam(42)
+ assert obj1 == 42
+ assert obj2 is None
+
+
+def script_with_explicit_empty_return():
+ return None
+
+
+def script_with_return():
+ return True
+
def spam_minimal():
# no arg defaults or kwarg defaults
@@ -141,6 +169,11 @@ def ham_C_closure(z):
TOP_FUNCTIONS = [
# shallow
+ simple_script,
+ complex_script,
+ script_with_globals,
+ script_with_explicit_empty_return,
+ script_with_return,
spam_minimal,
spam_with_builtins,
spam_with_globals_and_builtins,
@@ -179,6 +212,10 @@ def ham_C_closure(z):
]
STATELESS_FUNCTIONS = [
+ simple_script,
+ complex_script,
+ script_with_explicit_empty_return,
+ script_with_return,
spam,
spam_minimal,
spam_with_builtins,
@@ -200,10 +237,26 @@ def ham_C_closure(z):
]
STATELESS_CODE = [
*STATELESS_FUNCTIONS,
+ script_with_globals,
spam_with_globals_and_builtins,
spam_full,
]
+PURE_SCRIPT_FUNCTIONS = [
+ simple_script,
+ complex_script,
+ script_with_explicit_empty_return,
+ spam_minimal,
+ spam_with_builtins,
+ spam_with_inner_not_closure,
+ spam_with_inner_closure,
+]
+SCRIPT_FUNCTIONS = [
+ *PURE_SCRIPT_FUNCTIONS,
+ script_with_globals,
+ spam_with_globals_and_builtins,
+]
+
# generators
diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py
index 6715ee051336a1..32cf8aacaf6b72 100644
--- a/Lib/test/test_code.py
+++ b/Lib/test/test_code.py
@@ -673,6 +673,20 @@ def test_local_kinds(self):
VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW
funcs = {
+ defs.simple_script: {},
+ defs.complex_script: {
+ 'obj': CO_FAST_LOCAL,
+ 'pickle': CO_FAST_LOCAL,
+ 'spam_minimal': CO_FAST_LOCAL,
+ 'data': CO_FAST_LOCAL,
+ 'res': CO_FAST_LOCAL,
+ },
+ defs.script_with_globals: {
+ 'obj1': CO_FAST_LOCAL,
+ 'obj2': CO_FAST_LOCAL,
+ },
+ defs.script_with_explicit_empty_return: {},
+ defs.script_with_return: {},
defs.spam_minimal: {},
defs.spam_with_builtins: {
'x': CO_FAST_LOCAL,
@@ -898,6 +912,19 @@ def new_var_counts(*,
}
funcs = {
+ defs.simple_script: new_var_counts(),
+ defs.complex_script: new_var_counts(
+ purelocals=5,
+ globalvars=1,
+ attrs=2,
+ ),
+ defs.script_with_globals: new_var_counts(
+ purelocals=2,
+ globalvars=1,
+ ),
+ defs.script_with_explicit_empty_return: new_var_counts(),
+ defs.script_with_return: new_var_counts(),
+ defs.spam_minimal: new_var_counts(),
defs.spam_minimal: new_var_counts(),
defs.spam_with_builtins: new_var_counts(
purelocals=4,
diff --git a/Lib/test/test_crossinterp.py b/Lib/test/test_crossinterp.py
index 5ac0080db435a8..b366a29645e9f2 100644
--- a/Lib/test/test_crossinterp.py
+++ b/Lib/test/test_crossinterp.py
@@ -758,6 +758,126 @@ def test_other_objects(self):
])
+class PureShareableScriptTests(_GetXIDataTests):
+
+ MODE = 'script-pure'
+
+ VALID_SCRIPTS = [
+ '',
+ 'spam',
+ '# a comment',
+ 'print("spam")',
+ 'raise Exception("spam")',
+ """if True:
+ do_something()
+ """,
+ """if True:
+ def spam(x):
+ return x
+ class Spam:
+ def eggs(self):
+ return 42
+ x = Spam().eggs()
+ raise ValueError(spam(x))
+ """,
+ ]
+ INVALID_SCRIPTS = [
+ ' pass', # IndentationError
+ '----', # SyntaxError
+ """if True:
+ def spam():
+ # no body
+ spam()
+ """, # IndentationError
+ ]
+
+ def test_valid_str(self):
+ self.assert_roundtrip_not_equal([
+ *self.VALID_SCRIPTS,
+ ], expecttype=types.CodeType)
+
+ def test_invalid_str(self):
+ self.assert_not_shareable([
+ *self.INVALID_SCRIPTS,
+ ])
+
+ def test_valid_bytes(self):
+ self.assert_roundtrip_not_equal([
+ *(s.encode('utf8') for s in self.VALID_SCRIPTS),
+ ], expecttype=types.CodeType)
+
+ def test_invalid_bytes(self):
+ self.assert_not_shareable([
+ *(s.encode('utf8') for s in self.INVALID_SCRIPTS),
+ ])
+
+ def test_pure_script_code(self):
+ self.assert_roundtrip_equal_not_identical([
+ *(f.__code__ for f in defs.PURE_SCRIPT_FUNCTIONS),
+ ])
+
+ def test_impure_script_code(self):
+ self.assert_not_shareable([
+ *(f.__code__ for f in defs.SCRIPT_FUNCTIONS
+ if f not in defs.PURE_SCRIPT_FUNCTIONS),
+ ])
+
+ def test_other_code(self):
+ self.assert_not_shareable([
+ *(f.__code__ for f in defs.FUNCTIONS
+ if f not in defs.SCRIPT_FUNCTIONS),
+ *(f.__code__ for f in defs.FUNCTION_LIKE),
+ ])
+
+ def test_pure_script_function(self):
+ self.assert_roundtrip_not_equal([
+ *defs.PURE_SCRIPT_FUNCTIONS,
+ ], expecttype=types.CodeType)
+
+ def test_impure_script_function(self):
+ self.assert_not_shareable([
+ *(f for f in defs.SCRIPT_FUNCTIONS
+ if f not in defs.PURE_SCRIPT_FUNCTIONS),
+ ])
+
+ def test_other_function(self):
+ self.assert_not_shareable([
+ *(f for f in defs.FUNCTIONS
+ if f not in defs.SCRIPT_FUNCTIONS),
+ *defs.FUNCTION_LIKE,
+ ])
+
+ def test_other_objects(self):
+ self.assert_not_shareable([
+ None,
+ True,
+ False,
+ Ellipsis,
+ NotImplemented,
+ (),
+ [],
+ {},
+ object(),
+ ])
+
+
+class ShareableScriptTests(PureShareableScriptTests):
+
+ MODE = 'script'
+
+ def test_impure_script_code(self):
+ self.assert_roundtrip_equal_not_identical([
+ *(f.__code__ for f in defs.SCRIPT_FUNCTIONS
+ if f not in defs.PURE_SCRIPT_FUNCTIONS),
+ ])
+
+ def test_impure_script_function(self):
+ self.assert_roundtrip_not_equal([
+ *(f for f in defs.SCRIPT_FUNCTIONS
+ if f not in defs.PURE_SCRIPT_FUNCTIONS),
+ ], expecttype=types.CodeType)
+
+
class ShareableTypeTests(_GetXIDataTests):
MODE = 'xidata'
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 63f1d079d8d312..3030f45d72cefa 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1989,6 +1989,16 @@ get_crossinterp_data(PyObject *self, PyObject *args,
PyObject *kwargs)
goto error;
}
}
+ else if (strcmp(mode, "script") == 0) {
+ if (_PyCode_GetScriptXIData(tstate, obj, xidata) != 0) {
+ goto error;
+ }
+ }
+ else if (strcmp(mode, "script-pure") == 0) {
+ if (_PyCode_GetPureScriptXIData(tstate, obj, xidata) != 0) {
+ goto error;
+ }
+ }
else {
PyErr_Format(PyExc_ValueError, "unsupported mode %R", modeobj);
goto error;
diff --git a/Python/crossinterp.c b/Python/crossinterp.c
index 74ce02f1a26401..7d7e6551c3f2d2 100644
--- a/Python/crossinterp.c
+++ b/Python/crossinterp.c
@@ -6,8 +6,10 @@
#include "osdefs.h" // MAXPATHLEN
#include "pycore_ceval.h" // _Py_simple_func
#include "pycore_crossinterp.h" // _PyXIData_t
+#include "pycore_function.h" // _PyFunction_VerifyStateless()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_namespace.h" // _PyNamespace_New()
+#include "pycore_pythonrun.h" // _Py_SourceAsString()
#include "pycore_typeobject.h" // _PyStaticType_InitBuiltin()
@@ -784,6 +786,131 @@ _PyMarshal_GetXIData(PyThreadState *tstate, PyObject
*obj, _PyXIData_t *xidata)
}
+/* script wrapper */
+
+static int
+verify_script(PyThreadState *tstate, PyCodeObject *co, int checked, int pure)
+{
+ // Make sure it isn't a closure and (optionally) doesn't use globals.
+ PyObject *builtins = NULL;
+ if (pure) {
+ builtins = _PyEval_GetBuiltins(tstate);
+ assert(builtins != NULL);
+ }
+ if (checked) {
+ assert(_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) == 0);
+ }
+ else if (_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) < 0) {
+ return -1;
+ }
+ // Make sure it doesn't have args.
+ if (co->co_argcount > 0
+ || co->co_posonlyargcount > 0
+ || co->co_kwonlyargcount > 0
+ || co->co_flags & (CO_VARARGS | CO_VARKEYWORDS))
+ {
+ _PyErr_SetString(tstate, PyExc_ValueError,
+ "code with args not supported");
+ return -1;
+ }
+ // Make sure it doesn't return anything.
+ if (!_PyCode_ReturnsOnlyNone(co)) {
+ _PyErr_SetString(tstate, PyExc_ValueError,
+ "code that returns a value is not a script");
+ return -1;
+ }
+ return 0;
+}
+
+static int
+get_script_xidata(PyThreadState *tstate, PyObject *obj, int pure,
+ _PyXIData_t *xidata)
+{
+ // Get the corresponding code object.
+ PyObject *code = NULL;
+ int checked = 0;
+ if (PyCode_Check(obj)) {
+ code = obj;
+ Py_INCREF(code);
+ }
+ else if (PyFunction_Check(obj)) {
+ code = PyFunction_GET_CODE(obj);
+ assert(code != NULL);
+ Py_INCREF(code);
+ if (pure) {
+ if (_PyFunction_VerifyStateless(tstate, obj) < 0) {
+ goto error;
+ }
+ checked = 1;
+ }
+ }
+ else {
+ const char *filename = "<script>";
+ int optimize = 0;
+ PyCompilerFlags cf = _PyCompilerFlags_INIT;
+ cf.cf_flags = PyCF_SOURCE_IS_UTF8;
+ PyObject *ref = NULL;
+ const char *script = _Py_SourceAsString(obj, "???", "???", &cf, &ref);
+ if (script == NULL) {
+ if (!_PyObject_SupportedAsScript(obj)) {
+ // We discard the raised exception.
+ _PyErr_Format(tstate, PyExc_TypeError,
+ "unsupported script %R", obj);
+ }
+ goto error;
+ }
+ code = Py_CompileStringExFlags(
+ script, filename, Py_file_input, &cf, optimize);
+ Py_XDECREF(ref);
+ if (code == NULL) {
+ goto error;
+ }
+ // Compiled text can't have args or any return statements,
+ // nor be a closure. It can use globals though.
+ if (!pure) {
+ // We don't need to check for globals either.
+ checked = 1;
+ }
+ }
+
+ // Make sure it's actually a script.
+ if (verify_script(tstate, (PyCodeObject *)code, checked, pure) < 0) {
+ goto error;
+ }
+
+ // Convert the code object.
+ int res = _PyCode_GetXIData(tstate, code, xidata);
+ Py_DECREF(code);
+ if (res < 0) {
+ return -1;
+ }
+ return 0;
+
+error:
+ Py_XDECREF(code);
+ PyObject *cause = _PyErr_GetRaisedException(tstate);
+ assert(cause != NULL);
+ _set_xid_lookup_failure(
+ tstate, NULL, "object not a valid script", cause);
+ Py_DECREF(cause);
+ return -1;
+}
+
+int
+_PyCode_GetScriptXIData(PyThreadState *tstate,
+ PyObject *obj, _PyXIData_t *xidata)
+{
+ return get_script_xidata(tstate, obj, 0, xidata);
+}
+
+int
+_PyCode_GetPureScriptXIData(PyThreadState *tstate,
+ PyObject *obj, _PyXIData_t *xidata)
+{
+ return get_script_xidata(tstate, obj, 1, xidata);
+}
+
+
/* using cross-interpreter data */
PyObject *
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 4ee287af72fdb2..f67b72aa91f671 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1524,6 +1524,26 @@ Py_CompileStringExFlags(const char *str, const char
*filename_str, int start,
return co;
}
+int
+_PyObject_SupportedAsScript(PyObject *cmd)
+{
+ if (PyUnicode_Check(cmd)) {
+ return 1;
+ }
+ else if (PyBytes_Check(cmd)) {
+ return 1;
+ }
+ else if (PyByteArray_Check(cmd)) {
+ return 1;
+ }
+ else if (PyObject_CheckBuffer(cmd)) {
+ return 1;
+ }
+ else {
+ return 0;
+ }
+}
+
const char *
_Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what,
PyCompilerFlags *cf, PyObject **cmd_copy)
{
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]