On Wed, Jun 05, 2019 at 02:42:00PM -0600, Tom Hromatka 
<tom.hroma...@oracle.com> wrote:
> I created a simple double-forking test program to try and reproduce the
> issue, but I was unable to get it to happen.  Is your system heavily
> loaded?  Any recommendations on reproducing the issue?
I reproduce this when cgrulesengd is slowed down relatively, the overall load
or renice helps this. And the double forking program is multithreaded.

> Otherwise I think your changes look reasonable, but I would really like
> to test them out before giving my Reviewed-by.
Actually, when I reran my reproducer again, I noticed on more failure path for
a short-lived process, hence I'm sending v2 of the patch.

## My reproducer

cat >/etc/cgconfig.conf <<EOD
mount {
                cpuset  = /cgroup/cpuset;
                cpu     = /cgroup/cpu;
                cpuacct = /cgroup/cpuacct;
                memory  = /cgroup/memory;
                devices = /cgroup/devices;
                freezer = /cgroup/freezer;
                net_cls = /cgroup/net_cls;
                blkio   = /cgroup/blkio;
}

group testgroup {
cpu {
                cpu.cfs_period_us=100000;
                cpu.cfs_quota_us=50000;
                cpu.shares=1024;
}
cpuacct {
                cpuacct.usage=0;
}
memory {
                memory.limit_in_bytes=1073741824;
}
}
EOD

cat >/etc/cgrules.conf <<EOD
root:/root/repro/sleep-server-double-mt cpu,memory,cpuacct testgroup/
EOD

cat >/root/repro/sleep-server-double-mt.c <<EOD
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>

void *worker_fn(void *arg) {
        long duration = (long) arg;
        if (duration > 1)
                duration -= 1;
        sleep(duration);
        return NULL;
}

int main(int argc, char *argv[]) {
        int n, i;
        int duration;

        if(fork()) { // parent
                usleep(100000);
        } else { // child
                n = (argc > 2) ? atoi(argv[2]) : 2;
                duration = atoi(argv[1]);
                pthread_t *threads = malloc(sizeof(pthread_t) * n);
                for (i = 0; i < n; ++i) {
                        pthread_create(threads + i, NULL, worker_fn, (void 
*)duration);
                }

                sleep(duration);

                for (i = 0; i < n; ++i) {
                        pthread_join(threads[i], NULL);
                }
                free(threads);
        }
        return 0;
}
EOD

cat >/root/repro/Makefile <<EOD
CFLAGS+=-pthread
LDFLAGS+=-lpthread

all: sleep-server-double-mt
EOD

cat >/root/repro/loop.sh <<EOD
#!/bin/bash

DAEMON=/root/repro-public/sleep-server-double-mt
CGROUP=testgroup

killproc $DAEMON || :
counter=0
while true ; do
        #strace -f -tt -o strace.$counter start_daemon $DAEMON 10000 4 &
        start_daemon $DAEMON 10000 4 &
        while true ; do
                sleep 0.2
                pid=$(pidof $DAEMON)
                if [ $? -ne 0 ] ; then
                        # echo "fail"
                        continue
                fi
                if [[ "$pid" =~ " " ]] ; then
                        # echo ">$pid<"
                        continue
                fi
                break
        done
        file=/proc/$pid/cgroup
        if [ $(grep -c $CGROUP $file) -lt 3 ] ; then
                echo "Too little cgroups (PID=$pid, strace.$counter): "
                echo "---"
                cat $file
                echo "---"
                break;
        fi
        echo -n "."
        killproc $DAEMON
        counter=$(($counter+1))
done
EOD

Michal Koutný (1):
  cgrulesengd: Do not ignore changes of short-lived processes

 src/daemon/cgrulesengd.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

-- 
2.21.0



_______________________________________________
Libcg-devel mailing list
Libcg-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libcg-devel

Reply via email to