[gentoo-commits] proj/portage:master commit in: lib/portage/, lib/portage/tests/process/
commit: c556fee09b6b103a9bb8c2afa1c7a7ef25022598 Author: Zac Medico gentoo org> AuthorDate: Thu Feb 1 05:48:54 2024 + Commit: Zac Medico gentoo org> CommitDate: Thu Feb 1 16:10:28 2024 + URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=c556fee0 process.spawn: Add returnproc parameter In order to migrate away from unsafe os.fork() usage in threaded processes (https://github.com/python/cpython/issues/84559), add a returnproc parameter that is similar to returnpid, which causes spawn to return a single Process object instead of a list of pids. The Process API is a subset of asyncio.subprocess.Process. The returnproc parameter conflicts with the logfile parameter, since the caller is expected to use the fd_pipes parameter to implement logging (this was also true for the returnpid parameter). In the future, spawn will return objects of a different type but with a compatible interface to Process, in order to encapsulate implementation-dependent objects like multiprocessing.Process which are designed to manage the process lifecycle and need to persist until it exits. Trigger a UserWarning when the returnpid parameter is used, in order to encourage migration to returnproc (do not use DeprecationWarning since it is hidden by default). This warning will be temporarily suppressed for portage internals, until they finish migrating to returnproc. There are probably very few if any external consumers of spawn with the returnpid parameter, so it seems safe to move quickly with this deprecation. Bug: https://bugs.gentoo.org/916566 Signed-off-by: Zac Medico gentoo.org> lib/portage/process.py | 84 ++ lib/portage/tests/process/meson.build | 1 + lib/portage/tests/process/test_spawn_returnproc.py | 39 ++ 3 files changed, 124 insertions(+) diff --git a/lib/portage/process.py b/lib/portage/process.py index 71750a715f..6ec52efc4a 100644 --- a/lib/portage/process.py +++ b/lib/portage/process.py @@ -15,6 +15,7 @@ import subprocess import sys import traceback import os as _os +import warnings from dataclasses import dataclass from functools import lru_cache @@ -27,6 +28,7 @@ import portage portage.proxy.lazyimport.lazyimport( globals(), +"portage.util._eventloop.global_event_loop:global_event_loop", "portage.util:dump_traceback,writemsg,writemsg_level", ) @@ -277,12 +279,78 @@ def calc_env_stats(env) -> EnvStats: env_too_large_warnings = 0 +class Process: +""" +An object that wraps OS processes created by spawn. +In the future, spawn will return objects of a different type +but with a compatible interface to this class, in order +to encapsulate implementation-dependent objects like +multiprocessing.Process which are designed to manage +the process lifecycle and need to persist until it exits. +""" + +def __init__(self, pid): +self.pid = pid +self.returncode = None +self._exit_waiters = [] + +def __repr__(self): +return f"<{self.__class__.__name__} {self.pid}>" + +async def wait(self): +""" +Wait for the child process to terminate. + +Set and return the returncode attribute. +""" +if self.returncode is not None: +return self.returncode + +loop = global_event_loop() +if not self._exit_waiters: +loop._asyncio_child_watcher.add_child_handler(self.pid, self._child_handler) +waiter = loop.create_future() +self._exit_waiters.append(waiter) +return await waiter + +def _child_handler(self, pid, returncode): +if pid != self.pid: +raise AssertionError(f"expected pid {self.pid}, got {pid}") +self.returncode = returncode + +for waiter in self._exit_waiters: +if not waiter.cancelled(): +waiter.set_result(returncode) +self._exit_waiters = None + +def send_signal(self, sig): +"""Send a signal to the process.""" +if self.returncode is not None: +# Skip signalling a process that we know has already died. +return + +try: +os.kill(self.pid, sig) +except ProcessLookupError: +# Suppress the race condition error; bpo-40550. +pass + +def terminate(self): +"""Terminate the process with SIGTERM""" +self.send_signal(signal.SIGTERM) + +def kill(self): +"""Kill the process with SIGKILL""" +self.send_signal(signal.SIGKILL) + + def spawn( mycommand, env=None, opt_name=None, fd_pipes=None, returnpid=False, +returnproc=False, uid=None, gid=None, groups=None, @@ -315,6 +383,9 @@ def spawn( @param returnpid: Return the Process IDs for a successful spawn. NOTE: This requires the caller clean up all the PIDs, otherwise spawn will clean them. @type returnpid: Boolean +
[gentoo-commits] proj/portage:master commit in: lib/portage/, /, lib/portage/tests/process/
commit: 196caf376c5783c9123277711cdbd8e9711de436 Author: Florian Schmaus gentoo org> AuthorDate: Sun Jan 15 18:56:10 2023 + Commit: Sam James gentoo org> CommitDate: Sun Jan 15 22:11:19 2023 + URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=196caf37 lib/portage/process.py: show diag message if exec failed with E2BIG Signed-off-by: Florian Schmaus gentoo.org> Closes: https://github.com/gentoo/portage/pull/978 Signed-off-by: Sam James gentoo.org> NEWS | 2 ++ lib/portage/process.py | 29 lib/portage/tests/process/test_spawn_fail_e2big.py | 40 ++ 3 files changed, 71 insertions(+) diff --git a/NEWS b/NEWS index b149ecc4d..a173795b3 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,8 @@ Features: * cleanups: Use flynt on the codebase to upgrade to Python f-strings everywhere. +* process: Show diagnostic message if exec failed with E2BIG + Bug fixes: * ebuild: the PATH variable exported to ebuilds has been changed: The PATH setting from /etc/profile.env is appended to portage-internal diff --git a/lib/portage/process.py b/lib/portage/process.py index 5ba6c7867..6760f617c 100644 --- a/lib/portage/process.py +++ b/lib/portage/process.py @@ -437,6 +437,35 @@ def spawn( except SystemExit: raise except Exception as e: +if isinstance(e, OSError) and e.errno == errno.E2BIG: +# If exec() failed with E2BIG, then this is +# potentially because the environment variables +# grew to large. The following will gather some +# stats about the environment and print a +# diagnostic message to help identifying the +# culprit. See also +# - https://bugs.gentoo.org/721088 +# - https://bugs.gentoo.org/830187 +def encoded_length(s): +return len(os.fsencode(s)) + +env_size = 0 +env_largest_name = None +env_largest_size = 0 +for env_name, env_value in env.items(): +env_name_size = encoded_length(env_name) +env_value_size = encoded_length(env_value) +# Add two for '=' and the terminating null byte. +total_size = env_name_size + env_value_size + 2 +if total_size > env_largest_size: +env_largest_name = env_name +env_largest_size = total_size +env_size += total_size + +writemsg( +f"ERROR: Executing {mycommand} failed with E2BIG. Child process environment size: {env_size} bytes. Largest environment variable: {env_largest_name} ({env_largest_size} bytes)\n" +) + # We need to catch _any_ exception so that it doesn't # propagate out of this function and cause exiting # with anything other than os._exit() diff --git a/lib/portage/tests/process/test_spawn_fail_e2big.py b/lib/portage/tests/process/test_spawn_fail_e2big.py new file mode 100644 index 0..d3be51670 --- /dev/null +++ b/lib/portage/tests/process/test_spawn_fail_e2big.py @@ -0,0 +1,40 @@ +# Copyright 2023 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +import platform +import tempfile + +from pathlib import Path + +import portage.process + +from portage import shutil +from portage.const import BASH_BINARY +from portage.tests import TestCase + + +class SpawnE2bigTestCase(TestCase): +def testSpawnE2big(self): +if platform.system() != "Linux": +self.skipTest("not Linux") + +env = dict() +env["VERY_LARGE_ENV_VAR"] = "X" * 1024 * 256 + +tmpdir = tempfile.mkdtemp() +try: +logfile = tmpdir / Path("logfile") +echo_output = "Should never appear" +retval = portage.process.spawn( +[BASH_BINARY, "-c", "echo", echo_output], env=env, logfile=logfile +) + +with open(logfile) as f: +logfile_content = f.read() +self.assertIn( +"Largest environment variable: VERY_LARGE_ENV_VAR (262164 bytes)", +logfile_content, +) +self.assertEqual(retval, 1) +finally: +shutil.rmtree(tmpdir)
[gentoo-commits] proj/portage:master commit in: lib/portage/, lib/portage/tests/process/
commit: 6b08846736ebf8a224f8aad2a5b17baf66fec1e0 Author: Zac Medico gentoo org> AuthorDate: Sat Aug 8 02:19:31 2020 + Commit: Zac Medico gentoo org> CommitDate: Sun Aug 9 00:48:12 2020 + URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=6b088467 Add cached portage.getpid() function Since getpid is a syscall, cache results, and update them via an after fork hook. Signed-off-by: Zac Medico gentoo.org> lib/portage/__init__.py | 16 lib/portage/tests/process/test_AsyncFunction.py | 24 2 files changed, 40 insertions(+) diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py index 916c93510..4d4b590a8 100644 --- a/lib/portage/__init__.py +++ b/lib/portage/__init__.py @@ -14,6 +14,7 @@ try: if not hasattr(errno, 'ESTALE'): # ESTALE may not be defined on some systems, such as interix. errno.ESTALE = -1 + import multiprocessing.util import re import types import platform @@ -368,6 +369,21 @@ _internal_caller = False _sync_mode = False +class _ForkWatcher: + @staticmethod + def hook(_ForkWatcher): + _ForkWatcher.current_pid = _os.getpid() + +_ForkWatcher.hook(_ForkWatcher) + +multiprocessing.util.register_after_fork(_ForkWatcher, _ForkWatcher.hook) + +def getpid(): + """ + Cached version of os.getpid(). ForkProcess updates the cache. + """ + return _ForkWatcher.current_pid + def _get_stdin(): """ Buggy code in python's multiprocessing/process.py closes sys.stdin diff --git a/lib/portage/tests/process/test_AsyncFunction.py b/lib/portage/tests/process/test_AsyncFunction.py index 55857026d..3b360e02f 100644 --- a/lib/portage/tests/process/test_AsyncFunction.py +++ b/lib/portage/tests/process/test_AsyncFunction.py @@ -3,6 +3,7 @@ import sys +import portage from portage import os from portage.tests import TestCase from portage.util._async.AsyncFunction import AsyncFunction @@ -36,3 +37,26 @@ class AsyncFunctionTestCase(TestCase): def testAsyncFunctionStdin(self): loop = asyncio._wrap_loop() loop.run_until_complete(self._testAsyncFunctionStdin(loop)) + + def _test_getpid_fork(self): + """ + Verify that portage.getpid() cache is updated in a forked child process. + """ + loop = asyncio._wrap_loop() + proc = AsyncFunction(scheduler=loop, target=portage.getpid) + proc.start() + proc.wait() + self.assertEqual(proc.pid, proc.result) + + def test_getpid_fork(self): + self._test_getpid_fork() + + def test_getpid_double_fork(self): + """ + Verify that portage.getpid() cache is updated correctly after + two forks. + """ + loop = asyncio._wrap_loop() + proc = AsyncFunction(scheduler=loop, target=self._test_getpid_fork) + proc.start() + self.assertEqual(proc.wait(), 0)