Bug#803013: systemd should not destroy application created cgroups
found 803013 241-7~deb10u1 thanks Now updating my machines to buster, I see this issue is still present, now in systemd version 241-7~deb10u1. The same steps can reproduce: - Set up cgroups e.g. adding TaskIDs to /sys/fs/cgroup/cpu/DIR/tasks files. (I use cgrulesengd from package cgroup-tools, but any other use of cgroups is equally affected.) - Then when you use systemd commands: systemctl daemon-reload systemctl start anacron you will see your cgroups (your tasks files) becoming empty. Command daemon-reload seems to happen within "apt-get dist-upgrade" sequences, and "start anacron" happens nightly. (Some other systemd commands may also affect.) and the "same" fix applies: new patch file below, for changed sources. (Funny how this bug is not getting fixed, in four years...) Thanks, Paul -- Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia I support NTEU members taking a stand for workplace rights in the face of poorly-run change management. Visit www.nteu.org.au/sydney to learn more. diff -r -U18 a-241/src/basic/cgroup-util.c b-241/src/basic/cgroup-util.c --- a-241/src/basic/cgroup-util.c 2019-02-14 21:11:58.0 +1100 +++ b-241/src/basic/cgroup-util.c 2019-09-12 08:53:43.900643247 +1000 @@ -368,36 +368,52 @@ int cg_migrate( const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags) { bool done = false; _cleanup_set_free_ Set *s = NULL; int r, ret = 0; pid_t my_pid; assert(cfrom); assert(pfrom); assert(cto); assert(pto); +/* + * PSz 25 Oct 2015 + * An empty "to" path is surely wrong + * (do not annoy cgroups that are not ours). + * PSz 23 Jul 2017 + * Cannot(?) happen anymore, see: + * cg_migrate_recursive_fallback() + * cg_migrate_everywhere() + * below... log if it does! + */ +if (!strlen(pto)) { +log_warning("PSz debug: cg_migrate skip from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); +return ret; +} +/* log_warning("PSz debug: cg_migrate do from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); */ + s = set_new(NULL); if (!s) return -ENOMEM; my_pid = getpid_cached(); do { _cleanup_fclose_ FILE *f = NULL; pid_t pid = 0; done = true; r = cg_enumerate_processes(cfrom, pfrom, ); if (r < 0) { if (ret >= 0 && r != -ENOENT) return r; return ret; } @@ -509,36 +525,52 @@ CGroupFlags flags) { int r; assert(cfrom); assert(pfrom); assert(cto); assert(pto); r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); if (r < 0) { char prefix[strlen(pto) + 1]; /* This didn't work? Then let's try all prefixes of the destination */ PATH_FOREACH_PREFIX(prefix, pto) { int q; +/* + * PSz 23 Jul 2017 + * Skip an empty ("") prefix path: surely wrong, + * do not annoy cgroups that are not ours. + * Other comments: + * - Why this "did not work so try something else"? + * - Maybe should have used PATH_FOREACH_PREFIX_MORE + * for cleaner, more compact code. + * - Should check cg_attach_fallback() also, and maybe + * review all other uses of PATH_FOREACH_PREFIX. + */ +if (!strlen(prefix)) { +/* log_warning("PSz debug: cg_migrate_recursive_fallback skip from (%s)%s to (%s)[EMPTY prefix of %s]", cfrom, pfrom, cto, pto); */ +continue; +} + q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); if (q >= 0) return q; } } return r; } static const char *controller_to_dirname(const char *controller) { const char *e; assert(controller); /* Converts a controller name to the directory name below * /sys/fs/cgroup/ we want to mount it to. Effectively, this * just cuts off the name= prefixed used for named * hierarchies, if it is specified. */ @@ -2233,38 +2265,46 @@ if (q > 0)
Bug#803013: systemd should not destroy application created cgroups
A patch below, functionally identical to my previous. But this seems neater, showing the intent more clearly: clearer that this is a "true" bug in systemd. Cheers, Paul -- Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia diff -r -U23 a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c --- a/src/basic/cgroup-util.c 2017-07-20 09:32:36.0 +1000 +++ b/src/basic/cgroup-util.c 2017-07-23 13:51:28.0 +1000 @@ -363,46 +363,60 @@ return r; } return ret; } int cg_migrate( const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags) { bool done = false; _cleanup_set_free_ Set *s = NULL; int r, ret = 0; pid_t my_pid; assert(cfrom); assert(pfrom); assert(cto); assert(pto); +/* + * PSz 25 Oct 2015 + * An empty "to" path is surely wrong + * (do not annoy cgroups that are not ours). + * PSz 23 Jul 2017 + * Cannot happen anymore(?), see cg_migrate_recursive_fallback() + * below... log if it does! + */ +if (!strlen(pto)) { +log_warning("PSz debug: cg_migrate skip from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); +return ret; +} +/* log_warning("PSz debug: cg_migrate do from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); */ + s = set_new(NULL); if (!s) return -ENOMEM; my_pid = getpid(); do { _cleanup_fclose_ FILE *f = NULL; pid_t pid = 0; done = true; r = cg_enumerate_processes(cfrom, pfrom, ); if (r < 0) { if (ret >= 0 && r != -ENOENT) return r; return ret; } while ((r = cg_read_pid(f, )) > 0) { /* This might do weird stuff if we aren't a * single-threaded program. However, we @@ -504,46 +518,62 @@ int cg_migrate_recursive_fallback( const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags) { int r; assert(cfrom); assert(pfrom); assert(cto); assert(pto); r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); if (r < 0) { char prefix[strlen(pto) + 1]; /* This didn't work? Then let's try all prefixes of the destination */ PATH_FOREACH_PREFIX(prefix, pto) { int q; +/* + * PSz 23 Jul 2017 + * Skip an empty ("") prefix path: surely wrong, + * do not annoy cgroups that are not ours. + * Other comments: + * - Why this "did not work so try something else"? + * - Maybe should have used PATH_FOREACH_PREFIX_MORE + * for cleaner, more compact code. + * - Should check cg_attach_fallback() also, and maybe + * review all other uses of PATH_FOREACH_PREFIX. + */ +if (!strlen(prefix)) { +/* log_warning("PSz debug: cg_migrate_recursive_fallback skip from (%s)%s to (%s) (empty prefix of %s)", cfrom, pfrom, cto, pto); */ +continue; +} + q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); if (q >= 0) return q; } } return r; } static const char *controller_to_dirname(const char *controller) { const char *e; assert(controller); /* Converts a controller name to the directory name below * /sys/fs/cgroup/ we want to mount it to. Effectively, this * just cuts off the name= prefixed used for named * hierarchies, if it is specified. */ e = startswith(controller, "name="); if (e) return e;
Bug#803013: systemd should not destroy application created cgroups
Dear Martin, You can create a drop-in like /etc/systemd/system/cgred.service.d/delegate.conf with [Service] Delegate=yes I tried that, but did not help; also did not help to do similar with file named /etc/systemd/system/cgrulesengd.service.d/delegate.conf matching name of running daemon. Anything else you suggest to try? My guess is that the above could not possibly help. It may tell systemd that cgrulesengd will do weird things and to leave alone, but it does not tell what to leave alone. How could systemd guess that cgrulesengd will write things into /sys/fs/cgroup/cpu,cpuacct/ and to leave those? I think this issue is a "true bug" in systemd, that it should not use empty (zero-length) string pto "path to" values in cg_migrate() calls: it seems an oversight to have assert(pto) but no assert(strlen(pto)) sanity checks in the code. Maybe I should check where calls with empty strings originate from ... Cheers, Paul -- Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Hello Paul, Paul Szabo [2017-07-21 18:25 +1000]: > Where would I set that? My cgrulesengd (from package cgroup-tools) is > started from /etc/init.d/cgred, not from some systemd *.service thing. You can create a drop-in like /etc/systemd/system/cgred.service.d/delegate.conf with [Service] Delegate=yes > > https://anonscm.debian.org/cgit/pkg-systemd/systemd.git/tree/debian/patches/debian/cgroup-don-t-trim-cgroup-trees-created-by-someone-el.patch > No, it is not (and was never) sufficient: that is a different bug. OK, thanks for the heads-up. Martin
Bug#803013: systemd should not destroy application created cgroups
Dear Martin, ... the official and documented mechanism is to set "Delegate=yes" in a unit which wants to do its own cgroup management. Everything else is just a hack prone to bitrotting... Where would I set that? My cgrulesengd (from package cgroup-tools) is started from /etc/init.d/cgred, not from some systemd *.service thing. My cgrulesengd sets things under /sys/fs/cgroup/cpu,cpuacct/ and I do not see that mentioned in any systemd config-type files. (Distressing how this bug did not get fixed in two years...) Like it apparently happened to the previous patch that we've been carrying for three years already: https://anonscm.debian.org/cgit/pkg-systemd/systemd.git/tree/debian/patches/debian/cgroup-don-t-trim-cgroup-trees-created-by-someone-el.patch Seems this is not sufficient any more? You mean file debian/patches/debian/cgroup-don-t-trim-cgroup-trees-created-by-someone-el.patch within systemd_232-25.debian.tar.xz or already in say systemd_215-17+deb8u2.debian.tar.xz No, it is not (and was never) sufficient: that is a different bug. Thanks, Paul -- Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Hello all, Paul Szabo [2017-07-21 13:07 +1000]: > Now updating my machines to stretch, I see this issue is still present, It's an uphill battle indeed - the official and documented mechanism is to set "Delegate=yes" in a unit which wants to do its own cgroup management. Everything else is just a hack prone to bitrotting... > (Distressing how this bug did not get fixed in two years...) Like it apparently happened to the previous patch that we've been carrying for three years already: https://anonscm.debian.org/cgit/pkg-systemd/systemd.git/tree/debian/patches/debian/cgroup-don-t-trim-cgroup-trees-created-by-someone-el.patch Seems this is not sufficient any more? Martin
Bug#803013: systemd should not destroy application created cgroups
Now updating my machines to stretch, I see this issue is still present, now in systemd version 232-25. The same steps can reproduce: - Set up cgroups e.g. adding TaskIDs to /sys/fs/cgroup/cpu/DIR/tasks files. (I use cgrulesengd from package cgroup-tools, but any other use of cgroups is equally affected.) - Then when you use systemd commands: systemctl daemon-reload systemctl start anacron you will see your cgroups (your tasks files) becoming empty. Command daemon-reload seems to happen within "apt-get dist-upgrade" sequences, and "start anacron" happens nightly. (Some other systemd commands may also affect.) and the "same" fix applies: new patch file below, for changed sources. Please update the list of versions affected by the bug. Maybe you could set the severity back to critical: it does break unrelated software in a default setup. (Distressing how this bug did not get fixed in two years...) Thanks, Paul -- Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia diff -r -U17 a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c --- a/src/basic/cgroup-util.c 2017-07-20 09:32:36.0 +1000 +++ b/src/basic/cgroup-util.c 2017-07-20 09:41:31.0 +1000 @@ -369,34 +369,44 @@ int cg_migrate( const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags) { bool done = false; _cleanup_set_free_ Set *s = NULL; int r, ret = 0; pid_t my_pid; assert(cfrom); assert(pfrom); assert(cto); assert(pto); +/* + * PSz 25 Oct 2015 + * An empty "to" path is surely wrong (do not annoy cgroups that not ours) + */ +if (!strlen(pto)) { +/* log_warning("Debug: cg_migrate skip from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); */ +return ret; +} +/* log_warning("Debug: cg_migrate do from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); */ + s = set_new(NULL); if (!s) return -ENOMEM; my_pid = getpid(); do { _cleanup_fclose_ FILE *f = NULL; pid_t pid = 0; done = true; r = cg_enumerate_processes(cfrom, pfrom, ); if (r < 0) { if (ret >= 0 && r != -ENOENT) return r; return ret;
Bug#803013: systemd should not destroy application created cgroups
Package: systemd Version: 215-17+deb8u4 Followup-For: Bug #803013 Dear Maintainer, It seems that an option Delegate=yes was introduced upstream, but this does not seem to be available in Jessie. The current mode of operation for systemd is to destroy all the cgroup subtree of a service in e.g. memory controller. I use SLURM scheduler that utilizes cgroups to maintain resource limits and every time when I reload configuration followed by a restart of any service unit, my memory:/ controller gets cleaned and all processes are moved to the root (interestingly, the tree is still there, but tasks files are emptied). Interesting fact is that this was apparently fixed before, here is an entry from a package log: systemd (215-13) unstable; urgency=medium [ Christian Seiler ] * core: Don't migrate PIDs for units that may contain subcgroups. This stops messing up lxc/libvirt/other custom cgroup layouts after daemon-reload. (Closes: #777164) And now we are back again facing the same problems. Best, Tomasz -- Package-specific info: -- System Information: Debian Release: 8.4 APT prefers stable-updates APT policy: (500, 'stable-updates'), (500, 'stable') Architecture: amd64 (x86_64) Foreign Architectures: i386 Kernel: Linux 3.16.0-4-amd64 (SMP w/12 CPU cores) Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/bash Init: systemd (via /run/systemd/system) Versions of packages systemd depends on: ii acl 2.2.52-2 ii adduser 3.113+nmu3 ii initscripts 2.88dsf-59 ii libacl1 2.2.52-2 ii libaudit1 1:2.4-1+b1 ii libblkid1 2.25.2-6 ii libc6 2.19-18+deb8u4 ii libcap2 1:2.24-8 ii libcap2-bin 1:2.24-8 ii libcryptsetup4 2:1.6.6-5 ii libgcrypt20 1.6.3-2+deb8u1 ii libkmod218-3 ii liblzma55.1.1alpha+20120614-2+b3 ii libpam0g1.1.8-3.1+deb8u1+b1 ii libselinux1 2.3-2 ii libsystemd0 215-17+deb8u4 ii mount 2.25.2-6 ii sysv-rc 2.88dsf-59 ii udev215-17+deb8u4 ii util-linux 2.25.2-6 Versions of packages systemd recommends: ii dbus1.8.20-0+deb8u1 ii libpam-systemd 215-17+deb8u4 Versions of packages systemd suggests: pn systemd-ui -- Configuration Files: /etc/systemd/journald.conf changed: [Journal] ForwardToWall=no -- no debconf information
Bug#803013: systemd should not destroy application created cgroups
tags 803013 - fixed-upstream usertags 803013 - status-closed thanks I wrote: Please re-do your tags, or may I set tags myself? and received no response. Trying to do myself, please see discussion within bug report for reasons. Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Dear Julian, Thank you for the various pointers. > You set Delegate=yes for the unit ... That does not seem available yet in jessie. > The kernel cgroups implementation moved or is moving to a > single-writer, single-hierarchy implementation ... It does not seem to have moved yet in jessie. > ... user space daemon arbiter. systemd implements such an arbiter. It should permit nominating some other arbiter, and does not seem to have any plans to do that. > While the kernel probably still allows for multiple hierarchies in > order to not break the user space interface, they should not be used > anymore. Systemd has not yet implemented the cgrules functionality I require. > [Delegate=yes] is a mid-term workaround, and will be dropped ... OK. --- What should I use now for cgrules, and what in the future? Why is the conflict between the systemd and cgroup-tools packages not explicit in Debian packaging? --- About the patch I proposed. It seems wrong to pass empty strings. The code contains assert(pto) etc to protect against NULL pointers, seems an oversight to not have assert(strlen(pto)) also. My patch handles the case of empty strings (though does not go deep enough to find their origin). Would not my patch make systemd more robust? Thanks, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
On Fri, Nov 13, 2015 at 11:36:30AM +1100, paul.sz...@sydney.edu.au wrote: > Progress? For my efforts upstream, I got the comment: > > > Sorry, but systemd implements a single-writer cgroup logic (as > > requested by the kernel maintainers), and hence takes possesion of the > > whole tree. ... > > I observe it only uses the /sys/fs/cgroup/systemd tree. > (I wonder about the "req by kernel" comment.) See the end of the email. > > > ... If you want your own cgroup tree to manage, use the "Delegate=yes" > > feature in a service or scope, but otherwise systemd is in exclusive > > control. > > Do we have that? Can we have it everywhere? Can we have it by default, > should not it be so? No. You set Delegate=yes for the unit which manages its own cgroups hierarchy beneath the one designated by systemd. > > > Sorry, but multiple access to the cgroup tree is simply not supported. > > Not if we let systemd take over the world. This is not related to systemd. The kernel cgroups implementation moved or is moving to a single-writer, single-hierarchy implementation with a user space daemon arbiter. systemd implements such an arbiter. http://www.linuxfoundation.org/news-media/blogs/browse/2013/08/all-about-linux-kernel-cgroup%E2%80%99s-redesign https://wiki.freedesktop.org/www/Software/systemd/ControlGroupInterface/ https://lwn.net/Articles/555920/ While the kernel probably still allows for multiple hierarchies in order to not break the user space interface, they should not be used anymore. The single hierarchy changes were discussed in 2012: https://lwn.net/Articles/484251/ and introduced between 2012 and 2013. Complaining about that 2 years later is a bit late, although it would not have changed anything back then either, as the multiple-writer approach is fundamentally broken. -- Julian Andres Klode - Debian Developer, Ubuntu Member See http://wiki.debian.org/JulianAndresKlode and http://jak-linux.org/. Be friendly, do not top-post, and follow RFC 1855 "Netiquette". - If you don't I might ignore you.
Bug#803013: systemd should not destroy application created cgroups
On Fri, Nov 13, 2015 at 12:50:57PM +0100, Julian Andres Klode wrote: > On Fri, Nov 13, 2015 at 11:36:30AM +1100, paul.sz...@sydney.edu.au wrote: > > Progress? For my efforts upstream, I got the comment: > > > > > Sorry, but systemd implements a single-writer cgroup logic (as > > > requested by the kernel maintainers), and hence takes possesion of the > > > whole tree. ... > > > > I observe it only uses the /sys/fs/cgroup/systemd tree. > > (I wonder about the "req by kernel" comment.) > > See the end of the email. Also: http://thread.gmane.org/gmane.linux.kernel.cgroups/6638 > > > > > > ... If you want your own cgroup tree to manage, use the "Delegate=yes" > > > feature in a service or scope, but otherwise systemd is in exclusive > > > control. > > > > Do we have that? Can we have it everywhere? Can we have it by default, > > should not it be so? > > No. You set Delegate=yes for the unit which manages its own cgroups > hierarchy beneath the one designated by systemd. This is also only a mid-term workaround, and will be dropped longer term, AFAICT from: https://lwn.net/Articles/556112/ Because the kernel maintainer *really* wants a single writer. -- Julian Andres Klode - Debian Developer, Ubuntu Member See http://wiki.debian.org/JulianAndresKlode and http://jak-linux.org/. Be friendly, do not top-post, and follow RFC 1855 "Netiquette". - If you don't I might ignore you.
Bug#803013: systemd should not destroy application created cgroups
Control: severity -1 important Hi Am 12.11.2015 um 22:21 schrieb paul.sz...@sydney.edu.au: > severity 803013 critical > tag 803013 - moreinfo unreproducible + confirmed > thanks > > Dear Michael, > > You did not reply for a week, so I am trying to set tags myself. > > Also, while doing this, am trying to set severity back to "critical": > this bug does break unrelated software. Nah, the severity was fine, especially since it doesn't happen in a default setup. I would suggest that you raise this upstream at https://github.com/systemd/issues/new, as it is not really Debian specific. Regards, Michael -- Why is it that all of the instruments seeking intelligent life in the universe are pointed away from Earth? signature.asc Description: OpenPGP digital signature
Bug#803013: systemd should not destroy application created cgroups
severity 803013 critical tag 803013 - moreinfo unreproducible + confirmed thanks Dear Michael, You did not reply for a week, so I am trying to set tags myself. Also, while doing this, am trying to set severity back to "critical": this bug does break unrelated software. --- For the record: the following steps will reproduce the issue, on a freshly-installed jessie machine: - Run command dpkg-reconfigure libpam-runtime and de-select the [ ] Register user sessions in the systemd control group hierarchy option; then reboot. - Log in to the machine; probably not via GDM3 as that might not work at all; not via getty as then the issue will not show(?!!); but log in via XDM, or via telnetd or sshd. - Become root (log in as such, or use su). - As root, do commands: # Set things up mkdir /sys/fs/cgroup/cpu/mytest echo $$ > /sys/fs/cgroup/cpu/mytest/tasks # Check it is there grep . /sys/fs/cgroup/cpu/mytest/tasks # Do the systemd thing systemctl daemon-reload systemctl start anacron # See it gone grep . /sys/fs/cgroup/cpu/mytest/tasks Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Am 12.11.2015 um 22:29 schrieb Michael Biebl: > I would suggest that you raise this upstream at > https://github.com/systemd/issues/new, as it is not really Debian specific. Sorry, wrong URL: https://github.com/systemd/systemd/issues/new -- Why is it that all of the instruments seeking intelligent life in the universe are pointed away from Earth? signature.asc Description: OpenPGP digital signature
Bug#803013: systemd should not destroy application created cgroups
Progress? For my efforts upstream, I got the comment: > Sorry, but systemd implements a single-writer cgroup logic (as > requested by the kernel maintainers), and hence takes possesion of the > whole tree. ... I observe it only uses the /sys/fs/cgroup/systemd tree. (I wonder about the "req by kernel" comment.) > ... If you want your own cgroup tree to manage, use the "Delegate=yes" > feature in a service or scope, but otherwise systemd is in exclusive > control. Do we have that? Can we have it everywhere? Can we have it by default, should not it be so? > Sorry, but multiple access to the cgroup tree is simply not supported. Not if we let systemd take over the world. --- Sorry, I do not think I am willing to fight the war upstream. (Knowing full well that then maybe Linux will turn to mush, and that to escape this dictatorship we will all seek shelter under the MS umbrella.) Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, > I would suggest that you raise this upstream ... Done, see: https://github.com/systemd/systemd/issues/1872 Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, This bug is easily reproducible in a permitted and supported (if somewhat non-default) configuration: please remove the moreinfo unreproducible tags. I wonder whether the patch suggested here solves also the issue in http://bugs.debian.org/777601 and the in-progress fix in file debian/patches/core-don-t-migrate-PIDs-for-units-that-may-contain-s.patch Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, >> Running >> dpkg-reconfigure libpam-runtime >> asks me nicely ... >> There is no indication that I should, or must, select "do systemd". > > The default should be, that all those 3 are selected. So definitely > something odd. Am I allowed to choose other than default? Are such choices permitted and supported configuration? Thanks, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Am 2015-11-07 04:22, schrieb paul.sz...@sydney.edu.au: Dear Michael, I wonder how that line came to be missed on my machines ... If you restore the previous state and you ran dpkg-reconfigure libpam-runtime, what do you get? Running dpkg-reconfigure libpam-runtime asks me nicely: PAM configuration Pluggable Authentication Modules (PAM) determine how authentication, authorization, and password changing are handled on the system, as well as allowing configuration of additional actions to take when starting user sessions. Some PAM module packages provide profiles that can be used to automatically adjust the behavior of all PAM-using applications on the system. Please indicate which of these behaviors you wish to enable. PAM profiles to enable: [*] Unix authentication [ ] Register user sessions in the systemd control group hierarchy [ ] GNOME Keyring Daemon - Login keyring management [ ] Inheritable Capabilities Management There is no indication that I should, or must, select "do systemd". The default should be, that all those 3 are selected. So definitely something odd.
Bug#803013: systemd should not destroy application created cgroups
Hi Paul Am 06.11.2015 um 01:00 schrieb paul.sz...@sydney.edu.au: > Dear Michael, > >>> I wonder how that line came to be missed on my machines: I upgraded from >>> wheezy (which was upgraded from previous releases). >> >> If that line was not automatically added it probably means you had made >> custom modifications to the file in the past. > > Possible, but unlikely: the only difference between my > /etc/pam.d/common-session > file and that from the freshly installed jessie, is the > session optional pam_systemd.so > line. Well, apparently pam-auth-update was convinced you had local modifications. If you restore the previous state and you ran dpkg-reconfigure libpam-runtime, what do you get? >> Not having libpam-systemd installed probably means, that your user >> processes are not properly added to the correct cgroups. > > I do have libpam-systemd installed (though not "active" because of my > "broken" common-session file). > > With my "broken" /etc/pam.d/common-session file, systemd did not create > /sys/fs/cgroup/systemd/user.slice/user-N.slice/ directories. Why should > the lack of those interfere with my use of cgroups? If the PAM setting > is so important, should not it be set to required? > > There is also a file > /etc/pam.d/common-session-noninteractive > that does not contain the pam_systemd.so line, used for cron and sudo > (maybe others): can cgroups be used for or from those? I'm not sure what to do about this bug report. I'm inclined to close it, since it doesn't look like something which we can address in systemd itself. Cheers, Michael -- Why is it that all of the instruments seeking intelligent life in the universe are pointed away from Earth? signature.asc Description: OpenPGP digital signature
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, I wonder how that line came to be missed on my machines ... > If you restore the previous state and you ran > dpkg-reconfigure libpam-runtime, what do you get? Running dpkg-reconfigure libpam-runtime asks me nicely: PAM configuration Pluggable Authentication Modules (PAM) determine how authentication, authorization, and password changing are handled on the system, as well as allowing configuration of additional actions to take when starting user sessions. Some PAM module packages provide profiles that can be used to automatically adjust the behavior of all PAM-using applications on the system. Please indicate which of these behaviors you wish to enable. PAM profiles to enable: [*] Unix authentication [ ] Register user sessions in the systemd control group hierarchy [ ] GNOME Keyring Daemon - Login keyring management [ ] Inheritable Capabilities Management There is no indication that I should, or must, select "do systemd". >> There is also a file >> /etc/pam.d/common-session-noninteractive >> that does not contain the pam_systemd.so line, used for cron and sudo >> (maybe others): can cgroups be used for or from those? I wonder. > I'm not sure what to do about this bug report. I'm inclined to close > it, since it doesn't look like something which we can address in > systemd itself. I believe my patch would make systemd more robust, that may help to prevent future recurrences of this bug. Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Am 05.11.2015 um 05:11 schrieb paul.sz...@sydney.edu.au: > I wonder how that line came to be missed on my machines: I upgraded from > wheezy (which was upgraded from previous releases). If that line was not automatically added it probably means you had made custom modifications to the file in the past. The pam_systemd.so entry is added by pam-auth-update upon installation of libpam-systemd. The tool preserves any custom modifications though. Not having libpam-systemd installed probably means, that your user processes are not properly added to the correct cgroups. Michael -- Why is it that all of the instruments seeking intelligent life in the universe are pointed away from Earth? signature.asc Description: OpenPGP digital signature
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, >> I wonder how that line came to be missed on my machines: I upgraded from >> wheezy (which was upgraded from previous releases). > > If that line was not automatically added it probably means you had made > custom modifications to the file in the past. Possible, but unlikely: the only difference between my /etc/pam.d/common-session file and that from the freshly installed jessie, is the session optional pam_systemd.so line. > Not having libpam-systemd installed probably means, that your user > processes are not properly added to the correct cgroups. I do have libpam-systemd installed (though not "active" because of my "broken" common-session file). With my "broken" /etc/pam.d/common-session file, systemd did not create /sys/fs/cgroup/systemd/user.slice/user-N.slice/ directories. Why should the lack of those interfere with my use of cgroups? If the PAM setting is so important, should not it be set to required? There is also a file /etc/pam.d/common-session-noninteractive that does not contain the pam_systemd.so line, used for cron and sudo (maybe others): can cgroups be used for or from those? Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, > I'm not able to reproduce the issue either. > ... this looks like something specific to your system configuration, a > default jessie system doesn't seem to be affected. I found what causes reproducibility: editing the file /etc/pam.d/common-session and removing or commenting out the optional(?!!) line session optionalpam_systemd.so makes the issue reproducible. - Curiously, removing that line also causes GDM3 to become unusable (and I was thinking it was just too buggy to be used... which it is, really). I wonder how that line came to be missed on my machines: I upgraded from wheezy (which was upgraded from previous releases). So, to reproduce the issue, on a "plain jessie" machine (e.g. freshly installed from debian-8.2.0-amd64-netinst.iso): - Edit the file /etc/pam.d/common-session and remove or comment out the line session optional pam_systemd.so then reboot. - Log in to the machine; probably not via GDM3 as that might not work at all; not via getty as then the issue will not show(?!!); but log in via XDM, or via telnetd or sshd. (Wonder if sshd login with keys would bypass PAM and then not need the editing above.) - Become root (log in as such, or use su). - As root, do commands: # Set things up mkdir /sys/fs/cgroup/cpu/mytest echo $$ > /sys/fs/cgroup/cpu/mytest/tasks # Check it is there grep . /sys/fs/cgroup/cpu/mytest/tasks # Do the systemd thing systemctl daemon-reload systemctl start anacron # See it gone grep . /sys/fs/cgroup/cpu/mytest/tasks Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
paul.sz...@sydney.edu.au [2015-10-29 21:46 +1100]: > # Set things up > mkdir /sys/fs/cgroup/cpu/mytest > echo $$ > /sys/fs/cgroup/cpu/mytest/tasks > # Check it is there > grep . /sys/fs/cgroup/cpu/mytest/tasks > # Do the systemd thing > systemctl daemon-reload > systemctl start anacron > # See it gone > grep . /sys/fs/cgroup/cpu/mytest/tasks FTR, I was testing this in a jessie VM now, and I can't reproduce the cgroup changes there either. So this is more subtle to reproduce. Martin -- Martin Pitt| http://www.piware.de Ubuntu Developer (www.ubuntu.com) | Debian Developer (www.debian.org)
Bug#803013: systemd should not destroy application created cgroups
Dear Martin, >> # Set things up >> mkdir /sys/fs/cgroup/cpu/mytest >> echo $$ > /sys/fs/cgroup/cpu/mytest/tasks >> # Check it is there >> grep . /sys/fs/cgroup/cpu/mytest/tasks >> # Do the systemd thing >> systemctl daemon-reload >> systemctl start anacron >> # See it gone >> grep . /sys/fs/cgroup/cpu/mytest/tasks > > FTR, I was testing this in a jessie VM now, and I can't reproduce the > cgroup changes there either. So this is more subtle to reproduce. Thanks for testing. However, the test "works" for me on jessie: root@p639:~# COLUMNS=120 dpkg -l | grep systemd ii libpam-systemd:amd64 215-17+deb8u2 amd64 system and service manager - PAM module ii libsystemd-daemon0:amd64 215-17+deb8u2 amd64 systemd utility library (deprecated) ii libsystemd-login0:amd64 215-17+deb8u2 amd64 systemd login utility library (deprecated) ii libsystemd0:amd64215-17+deb8u2 amd64 systemd utility library ii systemd 215-17+deb8u2 amd64 system and service manager ii systemd-sysv 215-17+deb8u2 amd64 system and service manager - SysV links root@p639:~# mkdir /sys/fs/cgroup/cpu/mytest mkdir: cannot create directory ?/sys/fs/cgroup/cpu/mytest?: File exists root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks root@p639:~# echo $$ > /sys/fs/cgroup/cpu/mytest/tasks root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks 7198 root@p639:~# systemctl daemon-reload root@p639:~# systemctl start anacron root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks root@p639:~# and the issue is fixed with my patch: root@p639:~# COLUMNS=120 dpkg -l | grep systemd ii libpam-systemd:amd64 215-17+deb8u2.psz amd64 system and service manager - PAM module ii libsystemd-daemon0:amd64 215-17+deb8u2 amd64 systemd utility library (deprecated) ii libsystemd-login0:amd64 215-17+deb8u2 amd64 systemd login utility library (deprecated) ii libsystemd0:amd64215-17+deb8u2.psz amd64 systemd utility library ii systemd 215-17+deb8u2.psz amd64 system and service manager ii systemd-sysv 215-17+deb8u2.psz amd64 system and service manager - SysV links root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks root@p639:~# echo $$ > /sys/fs/cgroup/cpu/mytest/tasks root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks 7198 root@p639:~# systemctl daemon-reload root@p639:~# systemctl start anacron root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks 7198 root@p639:~# I wonder what is different between your jessie machine and mine. Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Dear Martin, [Sorry I should have added...] > This actually sounds similar to https://bugs.debian.org/777601, but > this already got fixed in 215-12, thus in Jessie. ... and 777601 sounds like https://bugzilla.redhat.com/show_bug.cgi?id=1139223 but then https://bugzilla.redhat.com/show_bug.cgi?id=1202859 is un-solved. I also wonder: why would, how could, cg_migrate() be called with an emtpy pto string? Is the bug deeper? Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, >>> Please test if this issue is still reproducible with 227 ... >> Sorry ... not easily enough ... > Or create a throw-away VM using sid. That's what I would use. Could do; or do on a throw-away partition; either way it takes hours, not minutes. I presume you have an "unstable" machine handy. Then you can test in seconds (as root): # Set things up mkdir /sys/fs/cgroup/cpu/mytest echo $$ > /sys/fs/cgroup/cpu/mytest/tasks # Check it is there grep . /sys/fs/cgroup/cpu/mytest/tasks # Do the systemd thing systemctl daemon-reload systemctl start anacron # See it gone grep . /sys/fs/cgroup/cpu/mytest/tasks Please let me know... Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
paul.sz...@sydney.edu.au [2015-10-29 21:46 +1100]: > Could do; or do on a throw-away partition; either way it takes hours, > not minutes. I presume you have an "unstable" machine handy. Then you > can test in seconds (as root): > > # Set things up > mkdir /sys/fs/cgroup/cpu/mytest > echo $$ > /sys/fs/cgroup/cpu/mytest/tasks > # Check it is there > grep . /sys/fs/cgroup/cpu/mytest/tasks > # Do the systemd thing > systemctl daemon-reload > systemctl start anacron > # See it gone > grep . /sys/fs/cgroup/cpu/mytest/tasks I tested this under 225 and 227, and in both cases the bash process stayed in that cgroup. I don't currently have a jessie VM around to confirm that I can actually reproduce the issue on 215; but I think Michael does. This actually sounds similar to https://bugs.debian.org/777601, but this already got fixed in 215-12, thus in Jessie. Martin -- Martin Pitt| http://www.piware.de Ubuntu Developer (www.ubuntu.com) | Debian Developer (www.debian.org)
Bug#803013: systemd should not destroy application created cgroups
Control: tags -1 moreinfo unreproducible Am 29.10.2015 um 20:16 schrieb paul.sz...@sydney.edu.au: >>> # Set things up >>> mkdir /sys/fs/cgroup/cpu/mytest >>> echo $$ > /sys/fs/cgroup/cpu/mytest/tasks >>> # Check it is there >>> grep . /sys/fs/cgroup/cpu/mytest/tasks >>> # Do the systemd thing >>> systemctl daemon-reload >>> systemctl start anacron >>> # See it gone >>> grep . /sys/fs/cgroup/cpu/mytest/tasks >> > root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks > root@p639:~# echo $$ > /sys/fs/cgroup/cpu/mytest/tasks > root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks > 7198 > root@p639:~# systemctl daemon-reload > root@p639:~# systemctl start anacron > root@p639:~# grep . /sys/fs/cgroup/cpu/mytest/tasks > 7198 > root@p639:~# > > I wonder what is different between your jessie machine and mine. Fwiw, I followed the exact same steps and have the same result as Martin. I'm not able to reproduce the issue either. Paul, this looks like something specific to your system configuration, a default jessie system doesn't seem to be affected. You mentioned you have tools like cgrulesengd etc running. Maybe they are interfering. Would be great if you can reduce your problem to a minimal test-case based on a default jessie installation. Thanks, Michael -- Why is it that all of the instruments seeking intelligent life in the universe are pointed away from Earth? signature.asc Description: OpenPGP digital signature
Bug#803013: systemd should not destroy application created cgroups
Dear Michael, > I couldn't quite follow what you try to achieve here. I.e. which > processes or services you want to confine and why. And what exactly > you did. > Would be great if you can be a bit more verbose on that. I am using cgrulesengd from package cgroup-tools to group each user's processes together: each user in a separate group, most with cpu.shares default 1024, some users with higher shares. The intent is "fair" sharing of CPU resources on compute servers. - Though, I do not think this is "relevant" here: any use of cgroups (outside of systemd) would be affected. > Please test if this issue is still reproducible with 227 from unstable > and if so, file the issue at https://github.com/systemd/systemd/issues. Sorry I do not think I can do that, not easily enough: I do not think I can install 227 on jessie, can I? (Of course I could upgrade one box to unstable and try that way...) Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia
Bug#803013: systemd should not destroy application created cgroups
Am 29.10.2015 um 02:46 schrieb paul.sz...@sydney.edu.au: >> Please test if this issue is still reproducible with 227 from unstable >> and if so, file the issue at https://github.com/systemd/systemd/issues. > > Sorry I do not think I can do that, not easily enough: I do not think I > can install 227 on jessie, can I? (Of course I could upgrade one box to > unstable and try that way...) Or create a throw-away VM using sid. That's what I would use. -- Why is it that all of the instruments seeking intelligent life in the universe are pointed away from Earth? signature.asc Description: OpenPGP digital signature
Bug#803013: systemd should not destroy application created cgroups
Control: severity -1 important Hi Paul, thanks for your bug report. Am 26.10.2015 um 03:12 schrieb Paul Szabo: > Package: systemd > Version: 215-17+deb8u2 > Severity: critical > Tags: patch > Justification: breaks unrelated software > > If you use cgroups, then systemd will on occasions destroy your > settings. To reproduce: > - Set up cgroups e.g. adding TaskIDs to /sys/fs/cgroup/cpu/DIR/tasks >files. (I use cgrulesengd from package cgroup-tools, but any other >use of cgroups is equally affected.) > - Then when you use systemd commands: > systemctl daemon-reload > systemctl start anacron >you will see your cgroups (your tasks files) becoming empty. >Command daemon-reload seems to happen within "apt-get dist-upgrade" >sequences, and "start anacron" happens nightly. (Some other systemd >commands may also affect.) I couldn't quite follow what you try to achieve here. I.e. which processes or services you want to confine and why. And what exactly you did. Would be great if you can be a bit more verbose on that. > I propose the attached patch to avoid the issue. This patch seems to work > well for me. Please test if this issue is still reproducible with 227 from unstable and if so, file the issue at https://github.com/systemd/systemd/issues. As for your patch, it's probably best if you create a pull request upstream. Thanks, Michael -- Why is it that all of the instruments seeking intelligent life in the universe are pointed away from Earth? signature.asc Description: OpenPGP digital signature
Bug#803013: systemd should not destroy application created cgroups
Package: systemd Version: 215-17+deb8u2 Severity: critical Tags: patch Justification: breaks unrelated software If you use cgroups, then systemd will on occasions destroy your settings. To reproduce: - Set up cgroups e.g. adding TaskIDs to /sys/fs/cgroup/cpu/DIR/tasks files. (I use cgrulesengd from package cgroup-tools, but any other use of cgroups is equally affected.) - Then when you use systemd commands: systemctl daemon-reload systemctl start anacron you will see your cgroups (your tasks files) becoming empty. Command daemon-reload seems to happen within "apt-get dist-upgrade" sequences, and "start anacron" happens nightly. (Some other systemd commands may also affect.) I propose the attached patch to avoid the issue. This patch seems to work well for me. Cheers, Paul Paul Szabo p...@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of SydneyAustralia -- Package-specific info: -- System Information: Debian Release: 8.2 APT prefers stable APT policy: (500, 'stable') Architecture: i386 (x86_64) Kernel: Linux 3.16.7-ckt11-pk07.12-amd64 (SMP w/8 CPU cores) Locale: LANG=C.UTF-8, LC_CTYPE=C.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/bash Init: systemd (via /run/systemd/system) Versions of packages systemd depends on: ii acl 2.2.52-2 ii adduser 3.113+nmu3 ii initscripts 2.88dsf-59 ii libacl1 2.2.52-2 ii libaudit1 1:2.4-1+b1 ii libblkid1 2.25.2-6 ii libc6 2.19-18+deb8u1 ii libcap2 1:2.24-8 ii libcap2-bin 1:2.24-8 ii libcryptsetup4 2:1.6.6-5 ii libgcrypt20 1.6.3-2 ii libkmod218-3 ii liblzma55.1.1alpha+20120614-2+b3 ii libpam0g1.1.8-3.1 ii libselinux1 2.3-2 ii libsystemd0 215-17+deb8u2 ii mount 2.25.2-6 ii sysv-rc 2.88dsf-59 ii udev215-17+deb8u2 ii util-linux 2.25.2-6 Versions of packages systemd recommends: ii dbus1.8.20-0+deb8u1 ii libpam-systemd 215-17+deb8u2 Versions of packages systemd suggests: pn systemd-ui -- no debconf information diff -r -U12 a/src/shared/cgroup-util.c b/src/shared/cgroup-util.c --- a/src/shared/cgroup-util.c 2015-10-25 07:16:24.0 +1100 +++ b/src/shared/cgroup-util.c 2015-10-26 06:03:25.0 +1100 @@ -281,24 +281,34 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) { bool done = false; _cleanup_set_free_ Set *s = NULL; int r, ret = 0; pid_t my_pid; assert(cfrom); assert(pfrom); assert(cto); assert(pto); +/* + * PSz 25 Oct 2015 + * An empty "to" path is surely wrong (do not annoy cgroups that not ours) + */ +if (!strlen(pto)) { +/* log_warning("Debug: cg_migrate skip from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); */ +return ret; +} +/* log_warning("Debug: cg_migrate do from (%s)%s to (%s)%s", cfrom, pfrom, cto, pto); */ + s = set_new(trivial_hash_func, trivial_compare_func); if (!s) return -ENOMEM; my_pid = getpid(); do { _cleanup_fclose_ FILE *f = NULL; pid_t pid = 0; done = true; r = cg_enumerate_processes(cfrom, pfrom, );