The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxcfs/pull/244

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
As it is, CPU usage stats in `/proc/stat` are shared with the host and all containers (issue #180). I'm trying to find a way for each container to see only its own CPU usage.

With this patch, `proc_stat_read()` reads per-container CPU usage from the *cpuacct* cgroup, `cpuacct.usage_all` to be exact. However, this file contains only usage in `user` and `system` modes, other fields from `/proc/stat` are not present. Idle time can be calculated, but as for other values like `nice` or `iowait`, they're all reported as zeroes in the resulting `/proc/stat` file. I don't think this can be solved without the kernel providing more information.

*cpuacct* cgroup v1 has to be present, otherwise `proc_stat_read()` will return an error. This could be changed if needed, I don't know if LXCFS is supposed to work without this cgroup somewhere.

We've been using it for several weeks on a staging node with [vpsAdminOS](https://github.com/vpsfreecz/vpsadminos) and it seems to work as expected.
From de5efbb3cec16c2df3940cfe7486582794f1271a Mon Sep 17 00:00:00 2001
From: Jakub Skokan <[email protected]>
Date: Thu, 14 Jun 2018 11:51:58 +0200
Subject: [PATCH] Per-container CPU usage in /proc/stat

Containers can see utilization of all available CPUs, even if the CPU
is utilized by other containers or by the host. The contents
of `/proc/stat` is shared across the system, except for hiding CPUs
excluded by cpuset. This commit attempts to fix that, but at a cost.
CPU usage is read from cpuacct cgroup, but that accounts only for
`user` and `system` fields from `/proc/stat`. Idle time can be
calculated, but other fields cannot, thus are always set to 0.

Signed-off-by: Jakub Skokan <[email protected]>
---
 bindings.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 136 insertions(+), 37 deletions(-)

diff --git a/bindings.c b/bindings.c
index 6ab1bdb..1ce0784 100644
--- a/bindings.c
+++ b/bindings.c
@@ -80,6 +80,11 @@ struct file_info {
        int cached;
 };
 
+struct cpuacct_usage {
+       uint64_t user;
+       uint64_t system;
+};
+
 /* The function of hash table.*/
 #define LOAD_SIZE 100 /*the size of hash_table */
 #define FLUSH_TIME 5  /*the flush rate */
@@ -3794,6 +3799,90 @@ static uint64_t get_reaper_age(pid_t pid)
        return procage;
 }
 
+/*
+ * Returns 0 on success.
+ * It is the caller's responsibility to free `return_usage`, unless this
+ * function returns an error.
+ */
+static int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage 
**return_usage)
+{
+
+       int cpucount = get_nprocs();
+       struct cpuacct_usage *cpu_usage;
+       int rv = 0, i, j, ret, read_pos = 0, read_cnt;
+       int cg_cpu;
+       uint64_t cg_user, cg_system;
+       int64_t ticks_per_sec;
+       char *usage_str = NULL;
+
+       ticks_per_sec = sysconf(_SC_CLK_TCK);
+
+       if (ticks_per_sec < 0 && errno == EINVAL) {
+               lxcfs_debug(
+                       "%s\n",
+                       "read_cpuacct_usage_all failed to determine number of 
clock ticks "
+                       "in a second");
+               return -1;
+       }
+
+       cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount);
+       if (!cpu_usage)
+               return -ENOMEM;
+
+       if (!cgfs_get_value("cpuacct", cg, "cpuacct.usage_all", &usage_str)) {
+               rv = -1;
+               goto err;
+       }
+
+       if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) {
+               lxcfs_error("read_cpuacct_usage_all reading first line from "
+                               "%s/cpuacct.usage_all failed.\n", cg);
+               rv = -1;
+               goto err;
+       }
+
+       read_pos += read_cnt;
+
+       for (i = 0, j = 0; i < cpucount; i++) {
+               ret = sscanf(usage_str + read_pos, "%d %lu %lu\n%n", &cg_cpu, 
&cg_user,
+                               &cg_system, &read_cnt);
+
+               if (ret == EOF) {
+                       break;
+
+               } else if (ret != 3) {
+                       lxcfs_error("read_cpuacct_usage_all reading from 
%s/cpuacct.usage_all "
+                                       "failed.\n", cg);
+                       rv = -1;
+                       goto err;
+               }
+
+               read_pos += read_cnt;
+
+               if (!cpu_in_cpuset(i, cpuset))
+                       continue;
+
+               /* Convert the time from nanoseconds to USER_HZ */
+               cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * 
ticks_per_sec;
+               cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * 
ticks_per_sec;
+               j++;
+       }
+
+       rv = 0;
+       *return_usage = cpu_usage;
+
+err:
+       if (usage_str)
+               free(usage_str);
+
+       if (rv != 0) {
+               free(cpu_usage);
+               *return_usage = NULL;
+       }
+
+       return rv;
+}
+
 #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
 static int proc_stat_read(char *buf, size_t size, off_t offset,
                struct fuse_file_info *fi)
@@ -3806,13 +3895,13 @@ static int proc_stat_read(char *buf, size_t size, off_t 
offset,
        size_t linelen = 0, total_len = 0, rv = 0;
        int curcpu = -1; /* cpu numbering starts at 0 */
        unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq 
= 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0;
-       unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, 
iowait_sum = 0,
-                                       irq_sum = 0, softirq_sum = 0, steal_sum 
= 0, guest_sum = 0, guest_nice_sum = 0;
+       unsigned long user_sum = 0, system_sum = 0, idle_sum = 0;
        char cpuall[CPUALL_MAX_SIZE];
        /* reserve for cpu all */
        char *cache = d->buf + CPUALL_MAX_SIZE;
        size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
        FILE *f = NULL;
+       struct cpuacct_usage *cg_cpu_usage = NULL;
 
        if (offset){
                if (offset > d->size)
@@ -3837,6 +3926,12 @@ static int proc_stat_read(char *buf, size_t size, off_t 
offset,
        if (!cpuset)
                goto err;
 
+       /* Read cpuacct.usage_all for all CPUs */
+       if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage) < 0) {
+               lxcfs_error("%s\n", "proc_stat_read failed to read from 
cpuacct.");
+               goto err;
+       }
+
        f = fopen("/proc/stat", "r");
        if (!f)
                goto err;
@@ -3851,7 +3946,7 @@ static int proc_stat_read(char *buf, size_t size, off_t 
offset,
                ssize_t l;
                int cpu;
                char cpu_char[10]; /* That's a lot of cores */
-               char *c;
+               uint64_t all_used, cg_used, new_idle;
 
                if (strlen(line) == 0)
                        continue;
@@ -3880,10 +3975,36 @@ static int proc_stat_read(char *buf, size_t size, off_t 
offset,
                        continue;
                curcpu ++;
 
-               c = strchr(line, ' ');
-               if (!c)
+               if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+                          &user,
+                          &nice,
+                          &system,
+                          &idle,
+                          &iowait,
+                          &irq,
+                          &softirq,
+                          &steal,
+                          &guest,
+                          &guest_nice) != 10)
                        continue;
-               l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
+
+               all_used = user + nice + system + iowait + irq + softirq + 
steal + guest + guest_nice;
+               cg_used = cg_cpu_usage[curcpu].user + 
cg_cpu_usage[curcpu].system;
+
+               if (all_used >= cg_used) {
+                       new_idle = idle + (all_used - cg_used);
+
+               } else {
+                       lxcfs_error("cpu%d from %s has unexpected cpu time: %lu 
in /proc/stat, "
+                                       "%lu in cpuacct.usage_all; unable to 
determine idle time\n",
+                                       curcpu, cg, all_used, cg_used);
+                       new_idle = idle;
+               }
+
+               l = snprintf(cache, cache_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 
0\n",
+                               curcpu, cg_cpu_usage[curcpu].user, 
cg_cpu_usage[curcpu].system,
+                               new_idle);
+
                if (l < 0) {
                        perror("Error writing to cache");
                        rv = 0;
@@ -3900,43 +4021,17 @@ static int proc_stat_read(char *buf, size_t size, off_t 
offset,
                cache_size -= l;
                total_len += l;
 
-               if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
-                          &user,
-                          &nice,
-                          &system,
-                          &idle,
-                          &iowait,
-                          &irq,
-                          &softirq,
-                          &steal,
-                          &guest,
-                          &guest_nice) != 10)
-                       continue;
-               user_sum += user;
-               nice_sum += nice;
-               system_sum += system;
-               idle_sum += idle;
-               iowait_sum += iowait;
-               irq_sum += irq;
-               softirq_sum += softirq;
-               steal_sum += steal;
-               guest_sum += guest;
-               guest_nice_sum += guest_nice;
+               user_sum += cg_cpu_usage[curcpu].user;
+               system_sum += cg_cpu_usage[curcpu].system;
+               idle_sum += new_idle;
        }
 
        cache = d->buf;
 
-       int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu  %lu %lu %lu 
%lu %lu %lu %lu %lu %lu %lu\n",
+       int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu  %lu 0 %lu %lu 
0 0 0 0 0 0\n",
                        user_sum,
-                       nice_sum,
                        system_sum,
-                       idle_sum,
-                       iowait_sum,
-                       irq_sum,
-                       softirq_sum,
-                       steal_sum,
-                       guest_sum,
-                       guest_nice_sum);
+                       idle_sum);
        if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) {
                memcpy(cache, cpuall, cpuall_len);
                cache += cpuall_len;
@@ -3959,6 +4054,10 @@ static int proc_stat_read(char *buf, size_t size, off_t 
offset,
 err:
        if (f)
                fclose(f);
+
+       if (cg_cpu_usage)
+               free(cg_cpu_usage);
+
        free(line);
        free(cpuset);
        free(cg);
_______________________________________________
lxc-devel mailing list
[email protected]
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to