https://github.com/python/cpython/commit/d118bc061b4f7ea916bb25f19c5c7f8e1923fbc7 commit: d118bc061b4f7ea916bb25f19c5c7f8e1923fbc7 branch: 3.14 author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com> committer: ambv <luk...@langa.pl> date: 2025-07-22T11:51:02+02:00 summary:
[3.14] gh-124621: Emscripten: Add support for async input devices (GH-136822) (GH-136935) This is useful for implementing proper `input()`. It requires the JavaScript engine to support the wasm JSPI spec which is now stage 4. It is supported on Chrome since version 137 and on Firefox and node behind a flag. We override the `__wasi_fd_read()` syscall with our own variant that checks for a readAsync operation. If it has it, we use our own async variant of `fd_read()`, otherwise we use the original `fd_read()`. We also add a variant of `FS.createDevice()` called `FS.createAsyncInputDevice()`. Finally, if JSPI is available, we wrap the `main()` symbol with `WebAssembly.promising()` so that we can stack switch from `fd_read()`. If JSPI is not available, attempting to read from an AsyncInputDevice will raise an `OSError`. (cherry picked from commit 7ae4749d064bd49b0dd96172fee20c1f1678d9e9) Co-authored-by: Hood Chatham <roberthoodchat...@gmail.com> files: A Lib/test/test_capi/test_emscripten.py M Modules/_testinternalcapi.c M Python/emscripten_syscalls.c M Tools/wasm/emscripten/__main__.py diff --git a/Lib/test/test_capi/test_emscripten.py b/Lib/test/test_capi/test_emscripten.py new file mode 100644 index 00000000000000..272d9a10ceb950 --- /dev/null +++ b/Lib/test/test_capi/test_emscripten.py @@ -0,0 +1,25 @@ +import unittest +from test.support import is_emscripten + +if not is_emscripten: + raise unittest.SkipTest("Emscripten-only test") + +from _testinternalcapi import emscripten_set_up_async_input_device +from pathlib import Path + + +class EmscriptenAsyncInputDeviceTest(unittest.TestCase): + def test_emscripten_async_input_device(self): + jspi_supported = emscripten_set_up_async_input_device() + p = Path("/dev/blah") + self.addCleanup(p.unlink) + if not jspi_supported: + with open(p, "r") as f: + self.assertRaises(OSError, f.readline) + return + + with open(p, "r") as f: + for _ in range(10): + self.assertEqual(f.readline().strip(), "ab") + self.assertEqual(f.readline().strip(), "fi") + self.assertEqual(f.readline().strip(), "xy") diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 8027f0015c7409..f84cf1a4263a2d 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2346,6 +2346,37 @@ incref_decref_delayed(PyObject *self, PyObject *op) Py_RETURN_NONE; } +#ifdef __EMSCRIPTEN__ +#include "emscripten.h" + +EM_JS(int, emscripten_set_up_async_input_device_js, (void), { + let idx = 0; + const encoder = new TextEncoder(); + const bufs = [ + encoder.encode("ab\n"), + encoder.encode("fi\n"), + encoder.encode("xy\n"), + ]; + function sleep(t) { + return new Promise(res => setTimeout(res, t)); + } + FS.createAsyncInputDevice("/dev", "blah", async () => { + await sleep(5); + return bufs[(idx ++) % 3]; + }); + return !!WebAssembly.promising; +}); + +static PyObject * +emscripten_set_up_async_input_device(PyObject *self, PyObject *Py_UNUSED(ignored)) { + if (emscripten_set_up_async_input_device_js()) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} +#endif + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -2448,6 +2479,9 @@ static PyMethodDef module_functions[] = { {"is_static_immortal", is_static_immortal, METH_O}, {"incref_decref_delayed", incref_decref_delayed, METH_O}, GET_NEXT_DICT_KEYS_VERSION_METHODDEF +#ifdef __EMSCRIPTEN__ + {"emscripten_set_up_async_input_device", emscripten_set_up_async_input_device, METH_NOARGS}, +#endif {NULL, NULL} /* sentinel */ }; diff --git a/Python/emscripten_syscalls.c b/Python/emscripten_syscalls.c index bb80f979420ec1..886262acbc6810 100644 --- a/Python/emscripten_syscalls.c +++ b/Python/emscripten_syscalls.c @@ -37,3 +37,185 @@ EM_JS(int, __syscall_umask_js, (int mask), { int __syscall_umask(int mask) { return __syscall_umask_js(mask); } + +#include <wasi/api.h> +#include <errno.h> +#undef errno + +// Variant of EM_JS that does C preprocessor substitution on the body +#define EM_JS_MACROS(ret, func_name, args, body...) \ + EM_JS(ret, func_name, args, body) + +EM_JS_MACROS(void, _emscripten_promising_main_js, (void), { + // Define FS.createAsyncInputDevice(), This is quite similar to + // FS.createDevice() defined here: + // https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libfs.js?plain=1#L1642 + // but instead of returning one byte at a time, the input() function should + // return a Uint8Array. This makes the handler code simpler, the + // `createAsyncInputDevice` simpler, and everything faster. + FS.createAsyncInputDevice = function(parent, name, input) { + parent = typeof parent == 'string' ? parent : FS.getPath(parent); + var path = PATH.join2(parent, name); + var mode = FS_getMode(true, false); + FS.createDevice.major ||= 64; + var dev = FS.makedev(FS.createDevice.major++, 0); + async function getDataBuf() { + var buf; + try { + buf = await input(); + } catch (e) { + throw new FS.ErrnoError(EIO); + } + if (!buf?.byteLength) { + throw new FS.ErrnoError(EAGAIN); + } + ops._dataBuf = buf; + } + + var ops = { + _dataBuf: new Uint8Array(0), + open(stream) { + stream.seekable = false; + }, + async readAsync(stream, buffer, offset, length, pos /* ignored */) { + buffer = buffer.subarray(offset, offset + length); + if (!ops._dataBuf.byteLength) { + await getDataBuf(); + } + var toRead = Math.min(ops._dataBuf.byteLength, buffer.byteLength); + buffer.subarray(0, toRead).set(ops._dataBuf); + buffer = buffer.subarray(toRead); + ops._dataBuf = ops._dataBuf.subarray(toRead); + if (toRead) { + stream.node.atime = Date.now(); + } + return toRead; + }, + }; + FS.registerDevice(dev, ops); + return FS.mkdev(path, mode, dev); + }; + if (!WebAssembly.promising) { + // No stack switching support =( + return; + } + const origResolveGlobalSymbol = resolveGlobalSymbol; + if (!Module.onExit && process?.exit) { + Module.onExit = (code) => process.exit(code); + } + // * wrap the main symbol with WebAssembly.promising, + // * call exit_with_live_runtime() to prevent emscripten from shutting down + // the runtime before the promise resolves, + // * call onExit / process.exit ourselves, since exit_with_live_runtime() + // prevented Emscripten from calling it normally. + resolveGlobalSymbol = function (name, direct = false) { + const orig = origResolveGlobalSymbol(name, direct); + if (name === "main") { + const main = WebAssembly.promising(orig.sym); + orig.sym = (...args) => { + (async () => { + const ret = await main(...args); + process?.exit?.(ret); + })(); + _emscripten_exit_with_live_runtime(); + }; + } + return orig; + }; +}) + +__attribute__((constructor)) void _emscripten_promising_main(void) { + _emscripten_promising_main_js(); +} + + +#define IOVEC_T_BUF_OFFSET 0 +#define IOVEC_T_BUF_LEN_OFFSET 4 +#define IOVEC_T_SIZE 8 +_Static_assert(offsetof(__wasi_iovec_t, buf) == IOVEC_T_BUF_OFFSET, + "Unexpected __wasi_iovec_t layout"); +_Static_assert(offsetof(__wasi_iovec_t, buf_len) == IOVEC_T_BUF_LEN_OFFSET, + "Unexpected __wasi_iovec_t layout"); +_Static_assert(sizeof(__wasi_iovec_t) == IOVEC_T_SIZE, + "Unexpected __wasi_iovec_t layout"); + +// If the stream has a readAsync handler, read to buffer defined in iovs, write +// number of bytes read to *nread, and return a promise that resolves to the +// errno. Otherwise, return null. +EM_JS_MACROS(__externref_t, __maybe_fd_read_async, ( + __wasi_fd_t fd, + const __wasi_iovec_t *iovs, + size_t iovcnt, + __wasi_size_t *nread +), { + if (!WebAssembly.promising) { + return null; + } + var stream; + try { + stream = SYSCALLS.getStreamFromFD(fd); + } catch (e) { + // If the fd was already closed or never existed, getStreamFromFD() + // raises. We'll let fd_read_orig() handle setting errno. + return null; + } + if (!stream.stream_ops.readAsync) { + // Not an async device. Fall back to __wasi_fd_read_orig(). + return null; + } + return (async () => { + // This is the same as libwasi.js fd_read() and doReadv() except we use + // readAsync and we await it. + // https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L331 + // https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L197 + try { + var ret = 0; + for (var i = 0; i < iovcnt; i++) { + var ptr = HEAP32[(iovs + IOVEC_T_BUF_OFFSET)/4]; + var len = HEAP32[(iovs + IOVEC_T_BUF_LEN_OFFSET)/4]; + iovs += IOVEC_T_SIZE; + var curr = await stream.stream_ops.readAsync(stream, HEAP8, ptr, len); + if (curr < 0) return -1; + ret += curr; + if (curr < len) break; // nothing more to read + } + HEAP32[nread/4] = ret; + return 0; + } catch (e) { + if (e.name !== 'ErrnoError') { + throw e; + } + return e.errno; + } + })(); +}; +); + +// Bind original fd_read syscall to __wasi_fd_read_orig(). +__wasi_errno_t __wasi_fd_read_orig(__wasi_fd_t fd, const __wasi_iovec_t *iovs, + size_t iovs_len, __wasi_size_t *nread) + __attribute__((__import_module__("wasi_snapshot_preview1"), + __import_name__("fd_read"), __warn_unused_result__)); + +// Take a promise that resolves to __wasi_errno_t and suspend until it resolves, +// get the output. +EM_JS(__wasi_errno_t, __block_for_errno, (__externref_t p), { + return p; +} +if (WebAssembly.Suspending) { + __block_for_errno = new WebAssembly.Suspending(__block_for_errno); +} +) + +// Replacement for fd_read syscall. Call __maybe_fd_read_async. If it returned +// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_errno +// to get the result. +__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs, + size_t iovs_len, __wasi_size_t *nread) { + __externref_t p = __maybe_fd_read_async(fd, iovs, iovs_len, nread); + if (__builtin_wasm_ref_is_null_extern(p)) { + return __wasi_fd_read_orig(fd, iovs, iovs_len, nread); + } + __wasi_errno_t res = __block_for_errno(p); + return res; +} diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index e552f6b680da9d..b25cbb01dedd31 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -274,10 +274,20 @@ def configure_emscripten_python(context, working_dir): REALPATH=abs_path fi + # Before node 24, --experimental-wasm-jspi uses different API, + # After node 24 JSPI is on by default. + ARGS=$({host_runner} -e "$(cat <<"EOF" + const major_version = Number(process.version.split(".")[0].slice(1)); + if (major_version === 24) {{ + process.stdout.write("--experimental-wasm-jspi"); + }} + EOF + )") + # We compute our own path, not following symlinks and pass it in so that # node_entry.mjs can set sys.executable correctly. # Intentionally allow word splitting on NODEFLAGS. - exec {host_runner} $NODEFLAGS {node_entry} --this-program="$($REALPATH "$0")" "$@" + exec {host_runner} $NODEFLAGS $ARGS {node_entry} --this-program="$($REALPATH "$0")" "$@" """ ) ) _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: arch...@mail-archive.com