https://github.com/python/cpython/commit/d118bc061b4f7ea916bb25f19c5c7f8e1923fbc7
commit: d118bc061b4f7ea916bb25f19c5c7f8e1923fbc7
branch: 3.14
author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com>
committer: ambv <luk...@langa.pl>
date: 2025-07-22T11:51:02+02:00
summary:

[3.14] gh-124621: Emscripten: Add support for async input devices (GH-136822) 
(GH-136935)

This is useful for implementing proper `input()`. It requires the
JavaScript engine to support the wasm JSPI spec which is now stage 4.
It is supported on Chrome since version 137 and on Firefox and node
behind a flag.

We override the `__wasi_fd_read()` syscall with our own variant that
checks for a readAsync operation. If it has it, we use our own async
variant of `fd_read()`, otherwise we use the original `fd_read()`.
We also add a variant of `FS.createDevice()` called
`FS.createAsyncInputDevice()`.

Finally, if JSPI is available, we wrap the `main()` symbol with
`WebAssembly.promising()` so that we can stack switch from `fd_read()`.
If JSPI is not available, attempting to read from an AsyncInputDevice
will raise an `OSError`.
(cherry picked from commit 7ae4749d064bd49b0dd96172fee20c1f1678d9e9)

Co-authored-by: Hood Chatham <roberthoodchat...@gmail.com>

files:
A Lib/test/test_capi/test_emscripten.py
M Modules/_testinternalcapi.c
M Python/emscripten_syscalls.c
M Tools/wasm/emscripten/__main__.py

diff --git a/Lib/test/test_capi/test_emscripten.py 
b/Lib/test/test_capi/test_emscripten.py
new file mode 100644
index 00000000000000..272d9a10ceb950
--- /dev/null
+++ b/Lib/test/test_capi/test_emscripten.py
@@ -0,0 +1,25 @@
+import unittest
+from test.support import is_emscripten
+
+if not is_emscripten:
+    raise unittest.SkipTest("Emscripten-only test")
+
+from _testinternalcapi import emscripten_set_up_async_input_device
+from pathlib import Path
+
+
+class EmscriptenAsyncInputDeviceTest(unittest.TestCase):
+    def test_emscripten_async_input_device(self):
+        jspi_supported = emscripten_set_up_async_input_device()
+        p = Path("/dev/blah")
+        self.addCleanup(p.unlink)
+        if not jspi_supported:
+            with open(p, "r") as f:
+                self.assertRaises(OSError, f.readline)
+            return
+
+        with open(p, "r") as f:
+            for _ in range(10):
+                self.assertEqual(f.readline().strip(), "ab")
+                self.assertEqual(f.readline().strip(), "fi")
+                self.assertEqual(f.readline().strip(), "xy")
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 8027f0015c7409..f84cf1a4263a2d 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2346,6 +2346,37 @@ incref_decref_delayed(PyObject *self, PyObject *op)
     Py_RETURN_NONE;
 }
 
+#ifdef __EMSCRIPTEN__
+#include "emscripten.h"
+
+EM_JS(int, emscripten_set_up_async_input_device_js, (void), {
+    let idx = 0;
+    const encoder = new TextEncoder();
+    const bufs = [
+        encoder.encode("ab\n"),
+        encoder.encode("fi\n"),
+        encoder.encode("xy\n"),
+    ];
+    function sleep(t) {
+        return new Promise(res => setTimeout(res, t));
+    }
+    FS.createAsyncInputDevice("/dev", "blah", async () => {
+        await sleep(5);
+        return bufs[(idx ++) % 3];
+    });
+    return !!WebAssembly.promising;
+});
+
+static PyObject *
+emscripten_set_up_async_input_device(PyObject *self, PyObject 
*Py_UNUSED(ignored)) {
+    if (emscripten_set_up_async_input_device_js()) {
+        Py_RETURN_TRUE;
+    } else {
+        Py_RETURN_FALSE;
+    }
+}
+#endif
+
 static PyMethodDef module_functions[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -2448,6 +2479,9 @@ static PyMethodDef module_functions[] = {
     {"is_static_immortal", is_static_immortal, METH_O},
     {"incref_decref_delayed", incref_decref_delayed, METH_O},
     GET_NEXT_DICT_KEYS_VERSION_METHODDEF
+#ifdef __EMSCRIPTEN__
+    {"emscripten_set_up_async_input_device", 
emscripten_set_up_async_input_device, METH_NOARGS},
+#endif
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/Python/emscripten_syscalls.c b/Python/emscripten_syscalls.c
index bb80f979420ec1..886262acbc6810 100644
--- a/Python/emscripten_syscalls.c
+++ b/Python/emscripten_syscalls.c
@@ -37,3 +37,185 @@ EM_JS(int, __syscall_umask_js, (int mask), {
 int __syscall_umask(int mask) {
     return __syscall_umask_js(mask);
 }
+
+#include <wasi/api.h>
+#include <errno.h>
+#undef errno
+
+// Variant of EM_JS that does C preprocessor substitution on the body
+#define EM_JS_MACROS(ret, func_name, args, body...)                            
\
+  EM_JS(ret, func_name, args, body)
+
+EM_JS_MACROS(void, _emscripten_promising_main_js, (void), {
+    // Define FS.createAsyncInputDevice(), This is quite similar to
+    // FS.createDevice() defined here:
+    // 
https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libfs.js?plain=1#L1642
+    // but instead of returning one byte at a time, the input() function should
+    // return a Uint8Array. This makes the handler code simpler, the
+    // `createAsyncInputDevice` simpler, and everything faster.
+    FS.createAsyncInputDevice = function(parent, name, input) {
+        parent = typeof parent == 'string' ? parent : FS.getPath(parent);
+        var path = PATH.join2(parent, name);
+        var mode = FS_getMode(true, false);
+        FS.createDevice.major ||= 64;
+        var dev = FS.makedev(FS.createDevice.major++, 0);
+        async function getDataBuf() {
+            var buf;
+            try {
+                buf = await input();
+            } catch (e) {
+                throw new FS.ErrnoError(EIO);
+            }
+            if (!buf?.byteLength) {
+                throw new FS.ErrnoError(EAGAIN);
+            }
+            ops._dataBuf = buf;
+        }
+
+        var ops = {
+            _dataBuf: new Uint8Array(0),
+            open(stream) {
+                stream.seekable = false;
+            },
+            async readAsync(stream, buffer, offset, length, pos /* ignored */) 
{
+                buffer = buffer.subarray(offset, offset + length);
+                if (!ops._dataBuf.byteLength) {
+                    await getDataBuf();
+                }
+                var toRead = Math.min(ops._dataBuf.byteLength, 
buffer.byteLength);
+                buffer.subarray(0, toRead).set(ops._dataBuf);
+                buffer = buffer.subarray(toRead);
+                ops._dataBuf = ops._dataBuf.subarray(toRead);
+                if (toRead) {
+                    stream.node.atime = Date.now();
+                }
+                return toRead;
+            },
+        };
+        FS.registerDevice(dev, ops);
+        return FS.mkdev(path, mode, dev);
+    };
+    if (!WebAssembly.promising) {
+        // No stack switching support =(
+        return;
+    }
+    const origResolveGlobalSymbol = resolveGlobalSymbol;
+    if (!Module.onExit && process?.exit) {
+        Module.onExit = (code) => process.exit(code);
+    }
+    // * wrap the main symbol with WebAssembly.promising,
+    // * call exit_with_live_runtime() to prevent emscripten from shutting down
+    //   the runtime before the promise resolves,
+    // * call onExit / process.exit ourselves, since exit_with_live_runtime()
+    //   prevented Emscripten from calling it normally.
+    resolveGlobalSymbol = function (name, direct = false) {
+        const orig = origResolveGlobalSymbol(name, direct);
+        if (name === "main") {
+            const main = WebAssembly.promising(orig.sym);
+            orig.sym = (...args) => {
+                (async () => {
+                    const ret = await main(...args);
+                    process?.exit?.(ret);
+                })();
+                _emscripten_exit_with_live_runtime();
+            };
+        }
+        return orig;
+    };
+})
+
+__attribute__((constructor)) void _emscripten_promising_main(void) {
+    _emscripten_promising_main_js();
+}
+
+
+#define IOVEC_T_BUF_OFFSET 0
+#define IOVEC_T_BUF_LEN_OFFSET 4
+#define IOVEC_T_SIZE 8
+_Static_assert(offsetof(__wasi_iovec_t, buf) == IOVEC_T_BUF_OFFSET,
+               "Unexpected __wasi_iovec_t layout");
+_Static_assert(offsetof(__wasi_iovec_t, buf_len) == IOVEC_T_BUF_LEN_OFFSET,
+               "Unexpected __wasi_iovec_t layout");
+_Static_assert(sizeof(__wasi_iovec_t) == IOVEC_T_SIZE,
+               "Unexpected __wasi_iovec_t layout");
+
+// If the stream has a readAsync handler, read to buffer defined in iovs, write
+// number of bytes read to *nread, and return a promise that resolves to the
+// errno. Otherwise, return null.
+EM_JS_MACROS(__externref_t, __maybe_fd_read_async, (
+    __wasi_fd_t fd,
+    const __wasi_iovec_t *iovs,
+    size_t iovcnt,
+    __wasi_size_t *nread
+), {
+    if (!WebAssembly.promising) {
+        return null;
+    }
+    var stream;
+    try {
+        stream = SYSCALLS.getStreamFromFD(fd);
+    } catch (e) {
+        // If the fd was already closed or never existed, getStreamFromFD()
+        // raises. We'll let fd_read_orig() handle setting errno.
+        return null;
+    }
+    if (!stream.stream_ops.readAsync) {
+        // Not an async device. Fall back to __wasi_fd_read_orig().
+        return null;
+    }
+    return (async () => {
+        // This is the same as libwasi.js fd_read() and doReadv() except we use
+        // readAsync and we await it.
+        // 
https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L331
+        // 
https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L197
+        try {
+            var ret = 0;
+            for (var i = 0; i < iovcnt; i++) {
+                var ptr = HEAP32[(iovs + IOVEC_T_BUF_OFFSET)/4];
+                var len = HEAP32[(iovs + IOVEC_T_BUF_LEN_OFFSET)/4];
+                iovs += IOVEC_T_SIZE;
+                var curr = await stream.stream_ops.readAsync(stream, HEAP8, 
ptr, len);
+                if (curr < 0) return -1;
+                ret += curr;
+                if (curr < len) break; // nothing more to read
+            }
+            HEAP32[nread/4] = ret;
+            return 0;
+        } catch (e) {
+            if (e.name !== 'ErrnoError') {
+                throw e;
+            }
+            return e.errno;
+        }
+    })();
+};
+);
+
+// Bind original fd_read syscall to __wasi_fd_read_orig().
+__wasi_errno_t __wasi_fd_read_orig(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
+                                   size_t iovs_len, __wasi_size_t *nread)
+    __attribute__((__import_module__("wasi_snapshot_preview1"),
+                   __import_name__("fd_read"), __warn_unused_result__));
+
+// Take a promise that resolves to __wasi_errno_t and suspend until it 
resolves,
+// get the output.
+EM_JS(__wasi_errno_t, __block_for_errno, (__externref_t p), {
+    return p;
+}
+if (WebAssembly.Suspending) {
+    __block_for_errno = new WebAssembly.Suspending(__block_for_errno);
+}
+)
+
+// Replacement for fd_read syscall. Call __maybe_fd_read_async. If it returned
+// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_errno
+// to get the result.
+__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
+                              size_t iovs_len, __wasi_size_t *nread) {
+  __externref_t p = __maybe_fd_read_async(fd, iovs, iovs_len, nread);
+  if (__builtin_wasm_ref_is_null_extern(p)) {
+    return __wasi_fd_read_orig(fd, iovs, iovs_len, nread);
+  }
+  __wasi_errno_t res = __block_for_errno(p);
+  return res;
+}
diff --git a/Tools/wasm/emscripten/__main__.py 
b/Tools/wasm/emscripten/__main__.py
index e552f6b680da9d..b25cbb01dedd31 100644
--- a/Tools/wasm/emscripten/__main__.py
+++ b/Tools/wasm/emscripten/__main__.py
@@ -274,10 +274,20 @@ def configure_emscripten_python(context, working_dir):
                 REALPATH=abs_path
             fi
 
+            # Before node 24, --experimental-wasm-jspi uses different API,
+            # After node 24 JSPI is on by default.
+            ARGS=$({host_runner} -e "$(cat <<"EOF"
+            const major_version = 
Number(process.version.split(".")[0].slice(1));
+            if (major_version === 24) {{
+                process.stdout.write("--experimental-wasm-jspi");
+            }}
+            EOF
+            )")
+
             # We compute our own path, not following symlinks and pass it in 
so that
             # node_entry.mjs can set sys.executable correctly.
             # Intentionally allow word splitting on NODEFLAGS.
-            exec {host_runner} $NODEFLAGS {node_entry} 
--this-program="$($REALPATH "$0")" "$@"
+            exec {host_runner} $NODEFLAGS $ARGS {node_entry} 
--this-program="$($REALPATH "$0")" "$@"
             """
         )
     )

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: arch...@mail-archive.com

Reply via email to