On Wed, Jun 05, 2019 at 02:42:00PM -0600, Tom Hromatka <tom.hroma...@oracle.com> wrote: > I created a simple double-forking test program to try and reproduce the > issue, but I was unable to get it to happen. Is your system heavily > loaded? Any recommendations on reproducing the issue? I reproduce this when cgrulesengd is slowed down relatively, the overall load or renice helps this. And the double forking program is multithreaded.
> Otherwise I think your changes look reasonable, but I would really like > to test them out before giving my Reviewed-by. Actually, when I reran my reproducer again, I noticed on more failure path for a short-lived process, hence I'm sending v2 of the patch. ## My reproducer cat >/etc/cgconfig.conf <<EOD mount { cpuset = /cgroup/cpuset; cpu = /cgroup/cpu; cpuacct = /cgroup/cpuacct; memory = /cgroup/memory; devices = /cgroup/devices; freezer = /cgroup/freezer; net_cls = /cgroup/net_cls; blkio = /cgroup/blkio; } group testgroup { cpu { cpu.cfs_period_us=100000; cpu.cfs_quota_us=50000; cpu.shares=1024; } cpuacct { cpuacct.usage=0; } memory { memory.limit_in_bytes=1073741824; } } EOD cat >/etc/cgrules.conf <<EOD root:/root/repro/sleep-server-double-mt cpu,memory,cpuacct testgroup/ EOD cat >/root/repro/sleep-server-double-mt.c <<EOD #include <unistd.h> #include <stdlib.h> #include <string.h> #include <pthread.h> void *worker_fn(void *arg) { long duration = (long) arg; if (duration > 1) duration -= 1; sleep(duration); return NULL; } int main(int argc, char *argv[]) { int n, i; int duration; if(fork()) { // parent usleep(100000); } else { // child n = (argc > 2) ? atoi(argv[2]) : 2; duration = atoi(argv[1]); pthread_t *threads = malloc(sizeof(pthread_t) * n); for (i = 0; i < n; ++i) { pthread_create(threads + i, NULL, worker_fn, (void *)duration); } sleep(duration); for (i = 0; i < n; ++i) { pthread_join(threads[i], NULL); } free(threads); } return 0; } EOD cat >/root/repro/Makefile <<EOD CFLAGS+=-pthread LDFLAGS+=-lpthread all: sleep-server-double-mt EOD cat >/root/repro/loop.sh <<EOD #!/bin/bash DAEMON=/root/repro-public/sleep-server-double-mt CGROUP=testgroup killproc $DAEMON || : counter=0 while true ; do #strace -f -tt -o strace.$counter start_daemon $DAEMON 10000 4 & start_daemon $DAEMON 10000 4 & while true ; do sleep 0.2 pid=$(pidof $DAEMON) if [ $? -ne 0 ] ; then # echo "fail" continue fi if [[ "$pid" =~ " " ]] ; then # echo ">$pid<" continue fi break done file=/proc/$pid/cgroup if [ $(grep -c $CGROUP $file) -lt 3 ] ; then echo "Too little cgroups (PID=$pid, strace.$counter): " echo "---" cat $file echo "---" break; fi echo -n "." killproc $DAEMON counter=$(($counter+1)) done EOD Michal Koutný (1): cgrulesengd: Do not ignore changes of short-lived processes src/daemon/cgrulesengd.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) -- 2.21.0 _______________________________________________ Libcg-devel mailing list Libcg-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libcg-devel