https://github.com/python/cpython/commit/c81fa2b9cd1ed4d4ff5c13984d6582c27e5f2633
commit: c81fa2b9cd1ed4d4ff5c13984d6582c27e5f2633
branch: main
author: Eric Snow <ericsnowcurren...@gmail.com>
committer: ericsnowcurrently <ericsnowcurren...@gmail.com>
date: 2025-05-08T15:07:46Z
summary:

gh-132775: Add _PyCode_GetScriptXIData() (gh-133480)

This converts functions, code, str, bytes, bytearray, and memoryview objects to 
PyCodeObject,
and ensure that the object looks like a script.  That means no args, no return, 
and no closure.
_PyCode_GetPureScriptXIData() takes it a step further and ensures there are no 
globals.

We also add _PyObject_SupportedAsScript() to the internal C-API.

files:
M Include/internal/pycore_crossinterp.h
M Include/internal/pycore_pythonrun.h
M Lib/test/_code_definitions.py
M Lib/test/test_code.py
M Lib/test/test_crossinterp.py
M Modules/_testinternalcapi.c
M Python/crossinterp.c
M Python/pythonrun.c

diff --git a/Include/internal/pycore_crossinterp.h 
b/Include/internal/pycore_crossinterp.h
index 9de61ef54125d5..9c9b2c2f9c599d 100644
--- a/Include/internal/pycore_crossinterp.h
+++ b/Include/internal/pycore_crossinterp.h
@@ -191,6 +191,14 @@ PyAPI_FUNC(int) _PyCode_GetXIData(
         PyThreadState *,
         PyObject *,
         _PyXIData_t *);
+PyAPI_FUNC(int) _PyCode_GetScriptXIData(
+        PyThreadState *,
+        PyObject *,
+        _PyXIData_t *);
+PyAPI_FUNC(int) _PyCode_GetPureScriptXIData(
+        PyThreadState *,
+        PyObject *,
+        _PyXIData_t *);
 
 
 /* using cross-interpreter data */
diff --git a/Include/internal/pycore_pythonrun.h 
b/Include/internal/pycore_pythonrun.h
index 0bfc5704dc4c59..7daed1326af8d5 100644
--- a/Include/internal/pycore_pythonrun.h
+++ b/Include/internal/pycore_pythonrun.h
@@ -25,6 +25,7 @@ extern int _PyRun_InteractiveLoopObject(
     PyObject *filename,
     PyCompilerFlags *flags);
 
+extern int _PyObject_SupportedAsScript(PyObject *);
 extern const char* _Py_SourceAsString(
     PyObject *cmd,
     const char *funcname,
diff --git a/Lib/test/_code_definitions.py b/Lib/test/_code_definitions.py
index d64ac45d85f396..733a15b25f6894 100644
--- a/Lib/test/_code_definitions.py
+++ b/Lib/test/_code_definitions.py
@@ -1,4 +1,32 @@
 
+def simple_script():
+    assert True
+
+
+def complex_script():
+    obj = 'a string'
+    pickle = __import__('pickle')
+    def spam_minimal():
+        pass
+    spam_minimal()
+    data = pickle.dumps(obj)
+    res = pickle.loads(data)
+    assert res == obj, (res, obj)
+
+
+def script_with_globals():
+    obj1, obj2 = spam(42)
+    assert obj1 == 42
+    assert obj2 is None
+
+
+def script_with_explicit_empty_return():
+    return None
+
+
+def script_with_return():
+    return True
+
 
 def spam_minimal():
     # no arg defaults or kwarg defaults
@@ -141,6 +169,11 @@ def ham_C_closure(z):
 
 TOP_FUNCTIONS = [
     # shallow
+    simple_script,
+    complex_script,
+    script_with_globals,
+    script_with_explicit_empty_return,
+    script_with_return,
     spam_minimal,
     spam_with_builtins,
     spam_with_globals_and_builtins,
@@ -179,6 +212,10 @@ def ham_C_closure(z):
 ]
 
 STATELESS_FUNCTIONS = [
+    simple_script,
+    complex_script,
+    script_with_explicit_empty_return,
+    script_with_return,
     spam,
     spam_minimal,
     spam_with_builtins,
@@ -200,10 +237,26 @@ def ham_C_closure(z):
 ]
 STATELESS_CODE = [
     *STATELESS_FUNCTIONS,
+    script_with_globals,
     spam_with_globals_and_builtins,
     spam_full,
 ]
 
+PURE_SCRIPT_FUNCTIONS = [
+    simple_script,
+    complex_script,
+    script_with_explicit_empty_return,
+    spam_minimal,
+    spam_with_builtins,
+    spam_with_inner_not_closure,
+    spam_with_inner_closure,
+]
+SCRIPT_FUNCTIONS = [
+    *PURE_SCRIPT_FUNCTIONS,
+    script_with_globals,
+    spam_with_globals_and_builtins,
+]
+
 
 # generators
 
diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py
index 6715ee051336a1..32cf8aacaf6b72 100644
--- a/Lib/test/test_code.py
+++ b/Lib/test/test_code.py
@@ -673,6 +673,20 @@ def test_local_kinds(self):
         VARKWARGS = CO_FAST_LOCAL | CO_FAST_ARG_VAR | CO_FAST_ARG_KW
 
         funcs = {
+            defs.simple_script: {},
+            defs.complex_script: {
+                'obj': CO_FAST_LOCAL,
+                'pickle': CO_FAST_LOCAL,
+                'spam_minimal': CO_FAST_LOCAL,
+                'data': CO_FAST_LOCAL,
+                'res': CO_FAST_LOCAL,
+            },
+            defs.script_with_globals: {
+                'obj1': CO_FAST_LOCAL,
+                'obj2': CO_FAST_LOCAL,
+            },
+            defs.script_with_explicit_empty_return: {},
+            defs.script_with_return: {},
             defs.spam_minimal: {},
             defs.spam_with_builtins: {
                 'x': CO_FAST_LOCAL,
@@ -898,6 +912,19 @@ def new_var_counts(*,
             }
 
         funcs = {
+            defs.simple_script: new_var_counts(),
+            defs.complex_script: new_var_counts(
+                purelocals=5,
+                globalvars=1,
+                attrs=2,
+            ),
+            defs.script_with_globals: new_var_counts(
+                purelocals=2,
+                globalvars=1,
+            ),
+            defs.script_with_explicit_empty_return: new_var_counts(),
+            defs.script_with_return: new_var_counts(),
+            defs.spam_minimal: new_var_counts(),
             defs.spam_minimal: new_var_counts(),
             defs.spam_with_builtins: new_var_counts(
                 purelocals=4,
diff --git a/Lib/test/test_crossinterp.py b/Lib/test/test_crossinterp.py
index 5ac0080db435a8..b366a29645e9f2 100644
--- a/Lib/test/test_crossinterp.py
+++ b/Lib/test/test_crossinterp.py
@@ -758,6 +758,126 @@ def test_other_objects(self):
         ])
 
 
+class PureShareableScriptTests(_GetXIDataTests):
+
+    MODE = 'script-pure'
+
+    VALID_SCRIPTS = [
+        '',
+        'spam',
+        '# a comment',
+        'print("spam")',
+        'raise Exception("spam")',
+        """if True:
+            do_something()
+            """,
+        """if True:
+            def spam(x):
+                return x
+            class Spam:
+                def eggs(self):
+                    return 42
+            x = Spam().eggs()
+            raise ValueError(spam(x))
+            """,
+    ]
+    INVALID_SCRIPTS = [
+        '    pass',  # IndentationError
+        '----',  # SyntaxError
+        """if True:
+            def spam():
+                # no body
+            spam()
+            """,  # IndentationError
+    ]
+
+    def test_valid_str(self):
+        self.assert_roundtrip_not_equal([
+            *self.VALID_SCRIPTS,
+        ], expecttype=types.CodeType)
+
+    def test_invalid_str(self):
+        self.assert_not_shareable([
+            *self.INVALID_SCRIPTS,
+        ])
+
+    def test_valid_bytes(self):
+        self.assert_roundtrip_not_equal([
+            *(s.encode('utf8') for s in self.VALID_SCRIPTS),
+        ], expecttype=types.CodeType)
+
+    def test_invalid_bytes(self):
+        self.assert_not_shareable([
+            *(s.encode('utf8') for s in self.INVALID_SCRIPTS),
+        ])
+
+    def test_pure_script_code(self):
+        self.assert_roundtrip_equal_not_identical([
+            *(f.__code__ for f in defs.PURE_SCRIPT_FUNCTIONS),
+        ])
+
+    def test_impure_script_code(self):
+        self.assert_not_shareable([
+            *(f.__code__ for f in defs.SCRIPT_FUNCTIONS
+              if f not in defs.PURE_SCRIPT_FUNCTIONS),
+        ])
+
+    def test_other_code(self):
+        self.assert_not_shareable([
+            *(f.__code__ for f in defs.FUNCTIONS
+              if f not in defs.SCRIPT_FUNCTIONS),
+            *(f.__code__ for f in defs.FUNCTION_LIKE),
+        ])
+
+    def test_pure_script_function(self):
+        self.assert_roundtrip_not_equal([
+            *defs.PURE_SCRIPT_FUNCTIONS,
+        ], expecttype=types.CodeType)
+
+    def test_impure_script_function(self):
+        self.assert_not_shareable([
+            *(f for f in defs.SCRIPT_FUNCTIONS
+              if f not in defs.PURE_SCRIPT_FUNCTIONS),
+        ])
+
+    def test_other_function(self):
+        self.assert_not_shareable([
+            *(f for f in defs.FUNCTIONS
+              if f not in defs.SCRIPT_FUNCTIONS),
+            *defs.FUNCTION_LIKE,
+        ])
+
+    def test_other_objects(self):
+        self.assert_not_shareable([
+            None,
+            True,
+            False,
+            Ellipsis,
+            NotImplemented,
+            (),
+            [],
+            {},
+            object(),
+        ])
+
+
+class ShareableScriptTests(PureShareableScriptTests):
+
+    MODE = 'script'
+
+    def test_impure_script_code(self):
+        self.assert_roundtrip_equal_not_identical([
+            *(f.__code__ for f in defs.SCRIPT_FUNCTIONS
+              if f not in defs.PURE_SCRIPT_FUNCTIONS),
+        ])
+
+    def test_impure_script_function(self):
+        self.assert_roundtrip_not_equal([
+            *(f for f in defs.SCRIPT_FUNCTIONS
+              if f not in defs.PURE_SCRIPT_FUNCTIONS),
+        ], expecttype=types.CodeType)
+
+
 class ShareableTypeTests(_GetXIDataTests):
 
     MODE = 'xidata'
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 63f1d079d8d312..3030f45d72cefa 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1989,6 +1989,16 @@ get_crossinterp_data(PyObject *self, PyObject *args, 
PyObject *kwargs)
             goto error;
         }
     }
+    else if (strcmp(mode, "script") == 0) {
+        if (_PyCode_GetScriptXIData(tstate, obj, xidata) != 0) {
+            goto error;
+        }
+    }
+    else if (strcmp(mode, "script-pure") == 0) {
+        if (_PyCode_GetPureScriptXIData(tstate, obj, xidata) != 0) {
+            goto error;
+        }
+    }
     else {
         PyErr_Format(PyExc_ValueError, "unsupported mode %R", modeobj);
         goto error;
diff --git a/Python/crossinterp.c b/Python/crossinterp.c
index 74ce02f1a26401..7d7e6551c3f2d2 100644
--- a/Python/crossinterp.c
+++ b/Python/crossinterp.c
@@ -6,8 +6,10 @@
 #include "osdefs.h"               // MAXPATHLEN
 #include "pycore_ceval.h"         // _Py_simple_func
 #include "pycore_crossinterp.h"   // _PyXIData_t
+#include "pycore_function.h"      // _PyFunction_VerifyStateless()
 #include "pycore_initconfig.h"    // _PyStatus_OK()
 #include "pycore_namespace.h"     // _PyNamespace_New()
+#include "pycore_pythonrun.h"     // _Py_SourceAsString()
 #include "pycore_typeobject.h"    // _PyStaticType_InitBuiltin()
 
 
@@ -784,6 +786,131 @@ _PyMarshal_GetXIData(PyThreadState *tstate, PyObject 
*obj, _PyXIData_t *xidata)
 }
 
 
+/* script wrapper */
+
+static int
+verify_script(PyThreadState *tstate, PyCodeObject *co, int checked, int pure)
+{
+    // Make sure it isn't a closure and (optionally) doesn't use globals.
+    PyObject *builtins = NULL;
+    if (pure) {
+        builtins = _PyEval_GetBuiltins(tstate);
+        assert(builtins != NULL);
+    }
+    if (checked) {
+        assert(_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) == 0);
+    }
+    else if (_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) < 0) {
+        return -1;
+    }
+    // Make sure it doesn't have args.
+    if (co->co_argcount > 0
+        || co->co_posonlyargcount > 0
+        || co->co_kwonlyargcount > 0
+        || co->co_flags & (CO_VARARGS | CO_VARKEYWORDS))
+    {
+        _PyErr_SetString(tstate, PyExc_ValueError,
+                         "code with args not supported");
+        return -1;
+    }
+    // Make sure it doesn't return anything.
+    if (!_PyCode_ReturnsOnlyNone(co)) {
+        _PyErr_SetString(tstate, PyExc_ValueError,
+                         "code that returns a value is not a script");
+        return -1;
+    }
+    return 0;
+}
+
+static int
+get_script_xidata(PyThreadState *tstate, PyObject *obj, int pure,
+                  _PyXIData_t *xidata)
+{
+    // Get the corresponding code object.
+    PyObject *code = NULL;
+    int checked = 0;
+    if (PyCode_Check(obj)) {
+        code = obj;
+        Py_INCREF(code);
+    }
+    else if (PyFunction_Check(obj)) {
+        code = PyFunction_GET_CODE(obj);
+        assert(code != NULL);
+        Py_INCREF(code);
+        if (pure) {
+            if (_PyFunction_VerifyStateless(tstate, obj) < 0) {
+                goto error;
+            }
+            checked = 1;
+        }
+    }
+    else {
+        const char *filename = "<script>";
+        int optimize = 0;
+        PyCompilerFlags cf = _PyCompilerFlags_INIT;
+        cf.cf_flags = PyCF_SOURCE_IS_UTF8;
+        PyObject *ref = NULL;
+        const char *script = _Py_SourceAsString(obj, "???", "???", &cf, &ref);
+        if (script == NULL) {
+            if (!_PyObject_SupportedAsScript(obj)) {
+                // We discard the raised exception.
+                _PyErr_Format(tstate, PyExc_TypeError,
+                              "unsupported script %R", obj);
+            }
+            goto error;
+        }
+        code = Py_CompileStringExFlags(
+                    script, filename, Py_file_input, &cf, optimize);
+        Py_XDECREF(ref);
+        if (code == NULL) {
+            goto error;
+        }
+        // Compiled text can't have args or any return statements,
+        // nor be a closure.  It can use globals though.
+        if (!pure) {
+            // We don't need to check for globals either.
+            checked = 1;
+        }
+    }
+
+    // Make sure it's actually a script.
+    if (verify_script(tstate, (PyCodeObject *)code, checked, pure) < 0) {
+        goto error;
+    }
+
+    // Convert the code object.
+    int res = _PyCode_GetXIData(tstate, code, xidata);
+    Py_DECREF(code);
+    if (res < 0) {
+        return -1;
+    }
+    return 0;
+
+error:
+    Py_XDECREF(code);
+    PyObject *cause = _PyErr_GetRaisedException(tstate);
+    assert(cause != NULL);
+    _set_xid_lookup_failure(
+                tstate, NULL, "object not a valid script", cause);
+    Py_DECREF(cause);
+    return -1;
+}
+
+int
+_PyCode_GetScriptXIData(PyThreadState *tstate,
+                        PyObject *obj, _PyXIData_t *xidata)
+{
+    return get_script_xidata(tstate, obj, 0, xidata);
+}
+
+int
+_PyCode_GetPureScriptXIData(PyThreadState *tstate,
+                            PyObject *obj, _PyXIData_t *xidata)
+{
+    return get_script_xidata(tstate, obj, 1, xidata);
+}
+
+
 /* using cross-interpreter data */
 
 PyObject *
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 4ee287af72fdb2..f67b72aa91f671 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1524,6 +1524,26 @@ Py_CompileStringExFlags(const char *str, const char 
*filename_str, int start,
     return co;
 }
 
+int
+_PyObject_SupportedAsScript(PyObject *cmd)
+{
+    if (PyUnicode_Check(cmd)) {
+        return 1;
+    }
+    else if (PyBytes_Check(cmd)) {
+        return 1;
+    }
+    else if (PyByteArray_Check(cmd)) {
+        return 1;
+    }
+    else if (PyObject_CheckBuffer(cmd)) {
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
+
 const char *
 _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, 
PyCompilerFlags *cf, PyObject **cmd_copy)
 {

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to