From: Tvrtko Ursulin <[email protected]>
Signed-off-by: Tvrtko Ursulin <[email protected]>
---
tools/.gitignore | 1 +
tools/Makefile.am | 2 +
tools/Makefile.sources | 1 +
tools/intel_gpu_top.c | 593 +++++++++++++++++++++++++++++++++++++++++++++++++
tools/meson.build | 1 +
5 files changed, 598 insertions(+)
create mode 100644 tools/intel_gpu_top.c
diff --git a/tools/.gitignore b/tools/.gitignore
index 19a1f7cb8e50..6e3042810176 100644
--- a/tools/.gitignore
+++ b/tools/.gitignore
@@ -17,6 +17,7 @@ intel_framebuffer_dump
intel_gem_info
intel_gpu_frequency
intel_gpu_time
+intel_gpu_top
intel_legacy_top
intel_gtt
intel_guc_logger
diff --git a/tools/Makefile.am b/tools/Makefile.am
index dcf282eaff4e..8f6c15791a3b 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -30,6 +30,8 @@ intel_aubdump_la_LDFLAGS = -module -avoid-version
-no-undefined
intel_aubdump_la_SOURCES = aubdump.c
intel_aubdump_la_LIBADD = $(top_builddir)/lib/libintel_tools.la -ldl
+intel_gpu_top_LDADD = $(top_builddir)/lib/libigt_perf.la -lm
+
bin_SCRIPTS = intel_aubdump
CLEANFILES = $(bin_SCRIPTS)
diff --git a/tools/Makefile.sources b/tools/Makefile.sources
index 9699b7d2f737..6f8668bd4d56 100644
--- a/tools/Makefile.sources
+++ b/tools/Makefile.sources
@@ -17,6 +17,7 @@ tools_prog_lists = \
intel_gpu_frequency \
intel_firmware_decode \
intel_gpu_time \
+ intel_gpu_top \
intel_legacy_top \
intel_gtt \
intel_guc_logger \
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
new file mode 100644
index 000000000000..59a112240092
--- /dev/null
+++ b/tools/intel_gpu_top.c
@@ -0,0 +1,593 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdint.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <math.h>
+
+#include "igt_perf.h"
+
+struct pmu_pair {
+ uint64_t cur;
+ uint64_t prev;
+};
+
+struct pmu_counter {
+ uint64_t config;
+ unsigned int idx;
+ struct pmu_pair val;
+};
+
+#define NUM_LOADS (3)
+
+struct engine {
+ const char *name;
+ double qd[3];
+ double load_avg[NUM_LOADS];
+ struct pmu_counter busy;
+ struct pmu_counter wait;
+ struct pmu_counter sema;
+ struct pmu_counter queued;
+ struct pmu_counter runnable;
+ struct pmu_counter running;
+};
+
+struct engines {
+ unsigned int num_engines;
+ unsigned int num_counters;
+ DIR *root;
+ int fd;
+ struct pmu_pair ts;
+
+ int rapl_fd;
+ double rapl_scale;
+
+ struct pmu_counter freq_req;
+ struct pmu_counter freq_act;
+ struct pmu_counter irq;
+ struct pmu_counter rc6;
+ struct pmu_counter rapl;
+
+ double qd_scale;
+
+ double load_exp[NUM_LOADS];
+ double load_avg[NUM_LOADS];
+
+ struct engine engine;
+};
+
+static uint64_t
+get_pmu_config(int dirfd, const char *name, const char *counter)
+{
+ char buf[128], *p;
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), "%s-%s", name, counter);
+ if (ret < 0 || ret == sizeof(buf))
+ return -1;
+
+ fd = openat(dirfd, buf, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret <= 0)
+ return -1;
+
+ p = index(buf, '0');
+ if (!p)
+ return -1;
+
+ return strtoul(p, NULL, 0);
+}
+
+#define engine_ptr(engines, n) \
+ ((struct engine *)((unsigned char *)(&engines->engine) + (n) *
sizeof(struct engine)))
+
+static struct engines *discover_engines(void)
+{
+ const char *sysfs_root = "/sys/devices/i915/events";
+ struct engines *engines;
+ struct dirent *dent;
+ int ret = 0;
+ DIR *d;
+
+ engines = malloc(sizeof(struct engines));
+ if (!engines)
+ return NULL;
+ memset(engines, 0, sizeof(*engines));
+
+ engines->num_engines = 0;
+
+ d = opendir(sysfs_root);
+ if (!d)
+ return NULL;
+
+ while ((dent = readdir(d)) != NULL) {
+ const char *endswith = "-busy";
+ const unsigned int endlen = strlen(endswith);
+ struct engine *engine =
+ engine_ptr(engines, engines->num_engines);
+ char buf[256];
+
+ if (dent->d_type != DT_REG)
+ continue;
+
+ if (strlen(dent->d_name) >= sizeof(buf)) {
+ ret = -1;
+ break;
+ }
+
+ strcpy(buf, dent->d_name);
+
+ /* xxxN-busy */
+ if (strlen(buf) < (endlen + 4))
+ continue;
+ if (strcmp(&buf[strlen(buf) - endlen], endswith))
+ continue;
+
+ memset(engine, 0, sizeof(*engine));
+
+ buf[strlen(buf) - endlen] = 0;
+ engine->name = strdup(buf);
+ if (!engine->name) {
+ ret = -1;
+ break;
+ }
+
+ engine->busy.config = get_pmu_config(dirfd(d),
+ engine->name,
+ "busy");
+ if (engine->busy.config == -1) {
+ ret = -1;
+ break;
+ }
+
+ engines->num_engines++;
+ engines = realloc(engines, sizeof(struct engines) +
+ engines->num_engines * sizeof(struct engine));
+ if (!engines) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ if (ret)
+ free(engines);
+ else
+ engines->root = d;
+
+ return ret == 0 ? engines : NULL;
+}
+
+static int
+filename_to_buf(const char *filename, char *buf, unsigned int bufsize)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ ret = read(fd, buf, bufsize - 1);
+ close(fd);
+ if (ret < 1)
+ return -1;
+
+ buf[ret] = '\0';
+
+ return 0;
+}
+
+static uint64_t filename_to_u64(const char *filename, int base)
+{
+ char buf[64], *b;
+
+ if (filename_to_buf(filename, buf, sizeof(buf)))
+ return 0;
+
+ /*
+ * Handle both single integer and key=value formats by skipping
+ * leading non-digits.
+ */
+ b = buf;
+ while (*b && !isdigit(*b))
+ b++;
+
+ return strtoull(b, NULL, base);
+}
+
+static uint64_t rapl_type_id(void)
+{
+ return filename_to_u64("/sys/devices/power/type", 10);
+}
+
+static uint64_t rapl_gpu_power(void)
+{
+ return filename_to_u64("/sys/devices/power/events/energy-gpu", 0);
+}
+
+static double filename_to_double(const char *filename)
+{
+ char buf[64];
+
+ if (filename_to_buf(filename, buf, sizeof(buf)))
+ return 0;
+
+ return strtod(buf, NULL);
+}
+
+static double rapl_gpu_power_scale(void)
+{
+ return filename_to_double("/sys/devices/power/events/energy-gpu.scale");
+}
+
+static double i915_qd_scale(void)
+{
+ return filename_to_double("/sys/devices/i915/events/rcs0-queued.scale");
+}
+
+#define __open_pmu(engines, pmu, idx) \
+({ \
+ int fd__; \
+\
+ fd__ = perf_i915_open_group((pmu)->config, (engines)->fd); \
+ if (fd__ >= 0) { \
+ if ((engines)->fd == -1) \
+ (engines)->fd = fd__; \
+ (pmu)->idx = (idx)++; \
+ (engines)->num_counters++; \
+ } \
+\
+ fd__; \
+})
+
+static int pmu_init(struct engines *engines)
+{
+ unsigned int idx = 0;
+ unsigned int i;
+ int fd;
+
+ engines->fd = -1;
+ engines->num_counters = 0;
+
+ engines->freq_req.config = I915_PMU_REQUESTED_FREQUENCY;
+ fd = __open_pmu(engines, &engines->freq_req, idx);
+ if (fd < 0)
+ return -1;
+
+ engines->freq_act.config = I915_PMU_ACTUAL_FREQUENCY;
+ fd = __open_pmu(engines, &engines->freq_act, idx);
+ if (fd < 0)
+ return -1;
+
+ engines->irq.config = I915_PMU_INTERRUPTS;
+ fd = __open_pmu(engines, &engines->irq, idx);
+ if (fd < 0)
+ return -1;
+
+ engines->rc6.config = I915_PMU_RC6_RESIDENCY;
+ fd = __open_pmu(engines, &engines->rc6, idx);
+ if (fd < 0)
+ return -1;
+
+ engines->qd_scale = i915_qd_scale();
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct engine *engine = engine_ptr(engines, i);
+ struct {
+ struct pmu_counter *pmu;
+ const char *counter;
+ } *cnt, counters[] = {
+ { .pmu = &engine->busy, .counter = "busy" },
+ { .pmu = &engine->wait, .counter = "wait" },
+ { .pmu = &engine->sema, .counter = "sema" },
+ { .pmu = &engine->queued, .counter = "queued" },
+ { .pmu = &engine->runnable, .counter = "runnable" },
+ { .pmu = &engine->running, .counter = "running" },
+ { .pmu = NULL, .counter = NULL },
+ };
+
+ for (cnt = counters; cnt->pmu; cnt++) {
+ if (!cnt->pmu->config)
+ cnt->pmu->config =
+ get_pmu_config(dirfd(engines->root),
+ engine->name,
+ cnt->counter);
+ fd = __open_pmu(engines, cnt->pmu, idx);
+ if (fd < 0)
+ return -1;
+ }
+ }
+
+ engines->rapl_scale = rapl_gpu_power_scale();
+ if (engines->rapl_scale != NAN)
+ engines->rapl_scale *= 1e3; /* from nano to micro */
+ engines->rapl.config = rapl_gpu_power();
+ engines->rapl_fd = igt_perf_open(rapl_type_id(), engines->rapl.config);
+ if (engines->rapl_fd < 0)
+ return -1;
+
+ return 0;
+}
+
+static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+ uint64_t buf[2 + num];
+ unsigned int i;
+
+ assert(read(fd, buf, sizeof(buf)) == sizeof(buf));
+
+ for (i = 0; i < num; i++)
+ val[i] = buf[2 + i];
+
+ return buf[1];
+}
+
+
+static double pmu_calc(struct pmu_pair *p, double d, double t, double s)
+{
+ double pct;
+
+ pct = p->cur - p->prev;
+ pct /= d;
+ pct /= t;
+ pct *= s;
+
+ if (s == 100.0 && pct > 100.0)
+ pct = 100.0;
+
+ return pct;
+}
+
+static uint64_t __pmu_read_single(int fd, uint64_t *ts)
+{
+ uint64_t data[2];
+
+ assert(read(fd, data, sizeof(data)) == sizeof(data));
+
+ if (ts)
+ *ts = data[1];
+
+ return data[0];
+}
+
+static uint64_t pmu_read_single(int fd)
+{
+ return __pmu_read_single(fd, NULL);
+}
+
+static void __update_sample(struct pmu_counter *counter, uint64_t val)
+{
+ counter->val.prev = counter->val.cur;
+ counter->val.cur = val;
+}
+
+static void update_sample(struct pmu_counter *counter, uint64_t *val)
+{
+ __update_sample(counter, val[counter->idx]);
+}
+
+static void pmu_sample(struct engines *engines)
+{
+ const int num_val = engines->num_counters;
+ uint64_t val[num_val];
+ unsigned int i;
+
+ engines->ts.prev = engines->ts.cur;
+ engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
+
+ __update_sample(&engines->rapl, pmu_read_single(engines->rapl_fd));
+
+ update_sample(&engines->freq_req, val);
+ update_sample(&engines->freq_act, val);
+ update_sample(&engines->irq, val);
+ update_sample(&engines->rc6, val);
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct engine *engine = engine_ptr(engines, i);
+
+ update_sample(&engine->busy, val);
+ update_sample(&engine->sema, val);
+ update_sample(&engine->wait, val);
+ update_sample(&engine->queued, val);
+ update_sample(&engine->runnable, val);
+ update_sample(&engine->running, val);
+ }
+}
+
+static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
+
+static void
+print_percentage_bar(double percent, int max_len)
+{
+ int bar_len = percent * (8 * (max_len - 2)) / 100.0;
+ int i;
+
+ putchar('|');
+
+ for (i = bar_len; i >= 8; i -= 8)
+ printf("%s", bars[8]);
+ if (i)
+ printf("%s", bars[i]);
+
+ for (i = 0; i < (max_len - 2 - (bar_len + 7) / 8); i++)
+ putchar(' ');
+
+ putchar('|');
+}
+
+#define DEFAULT_PERIOD_MS (1000)
+
+static void
+usage(const char *appname)
+{
+ printf("intel_gpu_top - Display a top-like summary of Intel GPU usage\n"
+ "\n"
+ "Usage: %s [parameters]\n"
+ "\n"
+ "\tThe following parameters are optional:\n"
+ "\t[-s <samples>] refresh period in ms (default %ums)\n"
+ "\t[-h] show this help text\n"
+ "\n",
+ appname, DEFAULT_PERIOD_MS);
+}
+
+static double update_load(double load, double exp, double val)
+{
+ return val + exp * (load - val);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int period_us = DEFAULT_PERIOD_MS * 1000;
+ const double load_period[NUM_LOADS] = { 1.0, 30.0, 900.0 };
+ struct engines *engines;
+ int con_w = -1, con_h = -1;
+ struct winsize ws;
+ unsigned int i;
+ double period;
+ int ret, ch;
+
+ /* Parse options */
+ while ((ch = getopt(argc, argv, "s:h")) != -1) {
+ switch (ch) {
+ case 's':
+ period_us = atoi(optarg) * 1000;
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ default:
+ fprintf(stderr, "Invalid option %c!\n", (char)optopt);
+ usage(argv[0]);
+ exit(1);
+ }
+ }
+
+ /* Get terminal size. */
+ if (ioctl(0, TIOCGWINSZ, &ws) != -1) {
+ con_w = ws.ws_col;
+ con_h = ws.ws_row;
+ }
+
+ engines = discover_engines();
+ if (!engines) {
+ fprintf(stderr, "Failed to detect engines!\n");
+ return 1;
+ }
+
+ ret = pmu_init(engines);
+ if (ret) {
+ fprintf(stderr, "Failed to initialize PMU!\n");
+ return 1;
+ }
+
+ /* Load average setup. */
+ period = (double)period_us / 1e6;
+ for (i = 0; i < NUM_LOADS; i++)
+ engines->load_exp[i] = exp(-period / load_period[i]);
+
+ pmu_sample(engines);
+
+ for (;;) {
+ double t, freq[2], irq, rc6, power;
+ double qd = 0;
+ int lines = 0;
+ unsigned int j;
+
+ usleep(period_us);
+
+ pmu_sample(engines);
+ t = (double)(engines->ts.cur - engines->ts.prev) / 1e9;
+
+ printf("\033[H\033[J");
+
+ freq[0] = pmu_calc(&engines->freq_req.val, 1.0, t, 1);
+ freq[1] = pmu_calc(&engines->freq_act.val, 1.0, t, 1);
+ irq = pmu_calc(&engines->irq.val, 1.0, t, 1);
+ rc6 = pmu_calc(&engines->rc6.val, 1e9, t, 100);
+ power = pmu_calc(&engines->rapl.val, 1.0, t,
engines->rapl_scale);
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct engine *engine = engine_ptr(engines, i);
+
+ engine->qd[0] = pmu_calc(&engine->queued.val, 1, t,
+ engines->qd_scale);
+ engine->qd[1] = pmu_calc(&engine->runnable.val, 1, t,
+ engines->qd_scale);
+ engine->qd[2] = pmu_calc(&engine->running.val, 1, t,
+ engines->qd_scale);
+
+ qd += engine->qd[1] + engine->qd[2];
+
+ for (j = 0; j < NUM_LOADS; j++) {
+ engine->load_avg[j] =
+ update_load(engine->load_avg[j],
+ engines->load_exp[j],
+ engine->qd[1] +
+ engine->qd[2]);
+ }
+ }
+
+ for (j = 0; j < NUM_LOADS; j++) {
+ engines->load_avg[j] =
+ update_load(engines->load_avg[j],
+ engines->load_exp[j],
+ qd);
+ }
+
+ printf("intel-gpu-top - load avg %5.2f, %5.2f, %5.2f;
%4.0f/%4.0f MHz; %3.0f%% RC6; %6.0fmW; %8.0f irqs/s\n",
+ engines->load_avg[0],
+ engines->load_avg[1],
+ engines->load_avg[2],
+ freq[0], freq[1],
+ rc6, power, irq);
+ lines++;
+
+ printf("\n");
+ lines++;
+
+ for (i = 0; i < engines->num_engines && lines < con_h; i++) {
+ struct engine *engine = engine_ptr(engines, i);
+ unsigned int max_w = con_w - 1;
+ unsigned int len;
+ double val[2];
+ char buf[128];
+
+ val[0] = pmu_calc(&engine->wait.val, 1e9, t, 100);
+ val[1] = pmu_calc(&engine->sema.val, 1e9, t, 100);
+ len = snprintf(buf, sizeof(buf),
+ "%6.2f%% wait, %6.2f%% sema",
+ val[0], val[1]);
+
+ val[0] = pmu_calc(&engine->busy.val, 1e9, t, 100);
+ len += printf("%8s %6.2f%% (%5.2f/%5.2f/%5.2f) ",
+ engine->name,
+ val[0],
+ engine->qd[0],
+ engine->qd[1],
+ engine->qd[2]);
+ print_percentage_bar(val[0], max_w - len);
+
+ printf("%s\n", buf);
+
+ lines++;
+ }
+
+ printf("\n");
+ }
+
+ return 0;
+}
diff --git a/tools/meson.build b/tools/meson.build
index ebce4e305d00..36038f7a9d22 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -23,6 +23,7 @@ tools_progs = [
'intel_gpu_frequency',
'intel_firmware_decode',
'intel_gpu_time',
+ 'intel_gpu-top',
'intel_legacy_top',
'intel_gtt',
'intel_guc_logger',
--
2.14.1
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx