The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxcfs/pull/244
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === As it is, CPU usage stats in `/proc/stat` are shared with the host and all containers (issue #180). I'm trying to find a way for each container to see only its own CPU usage. With this patch, `proc_stat_read()` reads per-container CPU usage from the *cpuacct* cgroup, `cpuacct.usage_all` to be exact. However, this file contains only usage in `user` and `system` modes, other fields from `/proc/stat` are not present. Idle time can be calculated, but as for other values like `nice` or `iowait`, they're all reported as zeroes in the resulting `/proc/stat` file. I don't think this can be solved without the kernel providing more information. *cpuacct* cgroup v1 has to be present, otherwise `proc_stat_read()` will return an error. This could be changed if needed, I don't know if LXCFS is supposed to work without this cgroup somewhere. We've been using it for several weeks on a staging node with [vpsAdminOS](https://github.com/vpsfreecz/vpsadminos) and it seems to work as expected.
From de5efbb3cec16c2df3940cfe7486582794f1271a Mon Sep 17 00:00:00 2001 From: Jakub Skokan <[email protected]> Date: Thu, 14 Jun 2018 11:51:58 +0200 Subject: [PATCH] Per-container CPU usage in /proc/stat Containers can see utilization of all available CPUs, even if the CPU is utilized by other containers or by the host. The contents of `/proc/stat` is shared across the system, except for hiding CPUs excluded by cpuset. This commit attempts to fix that, but at a cost. CPU usage is read from cpuacct cgroup, but that accounts only for `user` and `system` fields from `/proc/stat`. Idle time can be calculated, but other fields cannot, thus are always set to 0. Signed-off-by: Jakub Skokan <[email protected]> --- bindings.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 136 insertions(+), 37 deletions(-) diff --git a/bindings.c b/bindings.c index 6ab1bdb..1ce0784 100644 --- a/bindings.c +++ b/bindings.c @@ -80,6 +80,11 @@ struct file_info { int cached; }; +struct cpuacct_usage { + uint64_t user; + uint64_t system; +}; + /* The function of hash table.*/ #define LOAD_SIZE 100 /*the size of hash_table */ #define FLUSH_TIME 5 /*the flush rate */ @@ -3794,6 +3799,90 @@ static uint64_t get_reaper_age(pid_t pid) return procage; } +/* + * Returns 0 on success. + * It is the caller's responsibility to free `return_usage`, unless this + * function returns an error. + */ +static int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage **return_usage) +{ + + int cpucount = get_nprocs(); + struct cpuacct_usage *cpu_usage; + int rv = 0, i, j, ret, read_pos = 0, read_cnt; + int cg_cpu; + uint64_t cg_user, cg_system; + int64_t ticks_per_sec; + char *usage_str = NULL; + + ticks_per_sec = sysconf(_SC_CLK_TCK); + + if (ticks_per_sec < 0 && errno == EINVAL) { + lxcfs_debug( + "%s\n", + "read_cpuacct_usage_all failed to determine number of clock ticks " + "in a second"); + return -1; + } + + cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount); + if (!cpu_usage) + return -ENOMEM; + + if (!cgfs_get_value("cpuacct", cg, "cpuacct.usage_all", &usage_str)) { + rv = -1; + goto err; + } + + if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) { + lxcfs_error("read_cpuacct_usage_all reading first line from " + "%s/cpuacct.usage_all failed.\n", cg); + rv = -1; + goto err; + } + + read_pos += read_cnt; + + for (i = 0, j = 0; i < cpucount; i++) { + ret = sscanf(usage_str + read_pos, "%d %lu %lu\n%n", &cg_cpu, &cg_user, + &cg_system, &read_cnt); + + if (ret == EOF) { + break; + + } else if (ret != 3) { + lxcfs_error("read_cpuacct_usage_all reading from %s/cpuacct.usage_all " + "failed.\n", cg); + rv = -1; + goto err; + } + + read_pos += read_cnt; + + if (!cpu_in_cpuset(i, cpuset)) + continue; + + /* Convert the time from nanoseconds to USER_HZ */ + cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec; + cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec; + j++; + } + + rv = 0; + *return_usage = cpu_usage; + +err: + if (usage_str) + free(usage_str); + + if (rv != 0) { + free(cpu_usage); + *return_usage = NULL; + } + + return rv; +} + #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2) static int proc_stat_read(char *buf, size_t size, off_t offset, struct fuse_file_info *fi) @@ -3806,13 +3895,13 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, size_t linelen = 0, total_len = 0, rv = 0; int curcpu = -1; /* cpu numbering starts at 0 */ unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0; - unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0, - irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0, guest_nice_sum = 0; + unsigned long user_sum = 0, system_sum = 0, idle_sum = 0; char cpuall[CPUALL_MAX_SIZE]; /* reserve for cpu all */ char *cache = d->buf + CPUALL_MAX_SIZE; size_t cache_size = d->buflen - CPUALL_MAX_SIZE; FILE *f = NULL; + struct cpuacct_usage *cg_cpu_usage = NULL; if (offset){ if (offset > d->size) @@ -3837,6 +3926,12 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, if (!cpuset) goto err; + /* Read cpuacct.usage_all for all CPUs */ + if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage) < 0) { + lxcfs_error("%s\n", "proc_stat_read failed to read from cpuacct."); + goto err; + } + f = fopen("/proc/stat", "r"); if (!f) goto err; @@ -3851,7 +3946,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, ssize_t l; int cpu; char cpu_char[10]; /* That's a lot of cores */ - char *c; + uint64_t all_used, cg_used, new_idle; if (strlen(line) == 0) continue; @@ -3880,10 +3975,36 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, continue; curcpu ++; - c = strchr(line, ' '); - if (!c) + if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", + &user, + &nice, + &system, + &idle, + &iowait, + &irq, + &softirq, + &steal, + &guest, + &guest_nice) != 10) continue; - l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c); + + all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice; + cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system; + + if (all_used >= cg_used) { + new_idle = idle + (all_used - cg_used); + + } else { + lxcfs_error("cpu%d from %s has unexpected cpu time: %lu in /proc/stat, " + "%lu in cpuacct.usage_all; unable to determine idle time\n", + curcpu, cg, all_used, cg_used); + new_idle = idle; + } + + l = snprintf(cache, cache_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 0\n", + curcpu, cg_cpu_usage[curcpu].user, cg_cpu_usage[curcpu].system, + new_idle); + if (l < 0) { perror("Error writing to cache"); rv = 0; @@ -3900,43 +4021,17 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, cache_size -= l; total_len += l; - if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", - &user, - &nice, - &system, - &idle, - &iowait, - &irq, - &softirq, - &steal, - &guest, - &guest_nice) != 10) - continue; - user_sum += user; - nice_sum += nice; - system_sum += system; - idle_sum += idle; - iowait_sum += iowait; - irq_sum += irq; - softirq_sum += softirq; - steal_sum += steal; - guest_sum += guest; - guest_nice_sum += guest_nice; + user_sum += cg_cpu_usage[curcpu].user; + system_sum += cg_cpu_usage[curcpu].system; + idle_sum += new_idle; } cache = d->buf; - int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu 0 %lu %lu 0 0 0 0 0 0\n", user_sum, - nice_sum, system_sum, - idle_sum, - iowait_sum, - irq_sum, - softirq_sum, - steal_sum, - guest_sum, - guest_nice_sum); + idle_sum); if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) { memcpy(cache, cpuall, cpuall_len); cache += cpuall_len; @@ -3959,6 +4054,10 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, err: if (f) fclose(f); + + if (cg_cpu_usage) + free(cg_cpu_usage); + free(line); free(cpuset); free(cg);
_______________________________________________ lxc-devel mailing list [email protected] http://lists.linuxcontainers.org/listinfo/lxc-devel
