On Thu, Apr 11, 2024 at 02:14:34PM +0200, Anthony Harivel wrote:
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index fad9a7e8ff30..37f68c496807 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -544,4 +544,6 @@ uint32_t kvm_dirty_ring_size(void); > * reported for the VM. > */ > bool kvm_hwpoisoned_mem(void); > + > +bool kvm_is_rapl_feat_enable(CPUState *cs); > #endif > diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h > index 882e37e12c5b..8dbeda473c6c 100644 > --- a/include/sysemu/kvm_int.h > +++ b/include/sysemu/kvm_int.h > @@ -14,6 +14,9 @@ > #include "qemu/accel.h" > #include "qemu/queue.h" > #include "sysemu/kvm.h" > +#include "hw/boards.h" > +#include "hw/i386/topology.h" > +#include "io/channel-socket.h" > > typedef struct KVMSlot > { > @@ -48,6 +51,34 @@ typedef struct KVMMemoryListener { > > #define KVM_MSI_HASHTAB_SIZE 256 > > +typedef struct KVMHostTopoInfo { > + /* Number of package on the Host */ > + unsigned int maxpkgs; > + /* Number of cpus on the Host */ > + unsigned int maxcpus; > + /* Number of cpus on each different package */ > + unsigned int *pkg_cpu_count; > + /* Each package can have different maxticks */ > + unsigned int *maxticks; > +} KVMHostTopoInfo; > + > +struct KVMMsrEnergy { > + pid_t pid; > + bool enable; > + char *socket_path; > + QIOChannelSocket *sioc; > + QemuThread msr_thr; > + unsigned int vcpus; > + unsigned int vsockets; Add 'guest_' prefix on to these two, to make it clearer > + X86CPUTopoInfo topo_info; Lets call this 'guest_topo' too, to make it more explicitly distinguished from the next 'host_topo' field. > + KVMHostTopoInfo host_topo; > + const CPUArchIdList *cpu_list; This name choice has an unfortunate side effect. cpu.h has a '#define cpu_list x86_cpu_list' for the purposes of selecting the target specific cpu_list function. Unfortunately that macro affects your field name here, which is why the code is wierdly able to set an 'x86_cpu_list' field despite you having declared it 'cpu_list'. I'd suggest perhaps calling this field 'guest_cpus' to avoid this confusing side-effect, and again also making clear that this is tracking guest, not host, CPUs. > + uint64_t *msr_value; > + uint64_t msr_unit; > + uint64_t msr_limit; > + uint64_t msr_info; > +}; > + > enum KVMDirtyRingReaperState { > KVM_DIRTY_RING_REAPER_NONE = 0, > /* The reaper is sleeping */ > @@ -114,6 +145,7 @@ struct KVMState > bool kvm_dirty_ring_with_bitmap; > uint64_t kvm_eager_split_size; /* Eager Page Splitting chunk size */ > struct KVMDirtyRingReaper reaper; > + struct KVMMsrEnergy msr_energy; > NotifyVmexitOption notify_vmexit; > uint32_t notify_window; > uint32_t xen_version; > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > index e68cbe929302..3de69caa229e 100644 > --- a/target/i386/kvm/kvm.c > +++ b/target/i386/kvm/kvm.c > @@ -2513,6 +2565,339 @@ static void register_smram_listener(Notifier *n, void > *unused) > &smram_address_space, 1, "kvm-smram"); > } > > +static void *kvm_msr_energy_thread(void *data) > +{ > + KVMState *s = data; > + struct KVMMsrEnergy *vmsr = &s->msr_energy; > + > + g_autofree package_energy_stat *pkg_stat = NULL; > + g_autofree thread_stat *thd_stat = NULL; > + g_autofree pid_t *thread_ids = NULL; > + g_autofree CPUState *cpu = NULL; > + g_autofree unsigned int *vpkgs_energy_stat = NULL; > + unsigned int num_threads = 0; > + unsigned int tmp_num_threads = 0; > + > + X86CPUTopoIDs topo_ids; > + > + rcu_register_thread(); > + > + /* Allocate memory for each package energy status */ > + pkg_stat = g_new0(package_energy_stat, vmsr->host_topo.maxpkgs); > + > + /* Allocate memory for thread stats */ > + thd_stat = g_new0(thread_stat, 1); > + > + /* Allocate memory for holding virtual package energy counter */ > + vpkgs_energy_stat = g_new0(unsigned int, vmsr->vsockets); > + > + /* Populate the max tick of each packages */ > + for (int i = 0; i <= vmsr->host_topo.maxpkgs; i++) { The 'maxticks' array is allocated as 'maxpkgs' in size, so this limit condition needs to be '<' not '<=', to avoid an out of bounds write. > + /* > + * Max numbers of ticks per package > + * Time in second * Number of ticks/second * Number of cores/package > + * ex: 100 ticks/second/CPU, 12 CPUs per Package gives 1200 ticks max > + */ > + vmsr->host_topo.maxticks[i] = (MSR_ENERGY_THREAD_SLEEP_US / 1000000) > + * sysconf(_SC_CLK_TCK) > + * vmsr->host_topo.pkg_cpu_count[i]; > + } > + > + while (true) { > + /* Get all qemu threads id */ > + thread_ids = vmsr_get_thread_ids(vmsr->pid, &num_threads); Since you're re-allocating thread_ids on each loop iteration, you need to free it, not overwrite the pointer. This is easily achieved by moving the earlier declaration inside the loop body. ie g_autofree pid_t *thread_ids = thread_ids = vmsr_get_thread_ids(vmsr->pid, &num_threads); > + > + if (thread_ids == NULL) { > + goto clean; > + } > + > + if (tmp_num_threads < num_threads) { > + thd_stat = g_renew(thread_stat, thd_stat, num_threads); > + } > + > + tmp_num_threads = num_threads; You could eliminate 'tmp_num_threads' and just unconditionally call g_renew on each loop iteration. It'll effectively be a no-op if the size hasn't changed since the last iteration, and won't hurt performance in any measurable way. > + > + /* Populate all the thread stats */ > + for (int i = 0; i < num_threads; i++) { > + thd_stat[i].utime = g_new0(unsigned long long, 2); > + thd_stat[i].stime = g_new0(unsigned long long, 2); > + thd_stat[i].thread_id = thread_ids[i]; > + vmsr_read_thread_stat(vmsr->pid, > + thd_stat[i].thread_id, > + thd_stat[i].utime, > + thd_stat[i].stime, > + &thd_stat[i].cpu_id); > + thd_stat[i].pkg_id = > + vmsr_get_physical_package_id(thd_stat[i].cpu_id); > + } > + > + /* Retrieve all packages power plane energy counter */ > + for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) { > + for (int j = 0; j < num_threads; j++) { > + /* > + * Use the first thread we found that ran on the CPU > + * of the package to read the packages energy counter > + */ > + if (thd_stat[j].pkg_id == i) { > + pkg_stat[i].e_start = > + vmsr_read_msr(MSR_PKG_ENERGY_STATUS, > + thd_stat[j].cpu_id, > + thd_stat[j].thread_id, > + s->msr_energy.sioc); > + break; > + } > + } > + } > + > + /* Sleep a short period while the other threads are working */ > + usleep(MSR_ENERGY_THREAD_SLEEP_US); > + > + /* > + * Retrieve all packages power plane energy counter > + * Calculate the delta of all packages > + */ > + for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) { > + for (int j = 0; j < num_threads; j++) { > + /* > + * Use the first thread we found that ran on the CPU > + * of the package to read the packages energy counter > + */ > + if (thd_stat[j].pkg_id == i) { > + pkg_stat[i].e_end = > + vmsr_read_msr(MSR_PKG_ENERGY_STATUS, > + thd_stat[j].cpu_id, > + thd_stat[j].thread_id, > + s->msr_energy.sioc); > + /* > + * Prevent the case we have migrate the VM > + * during the sleep period or any other cases > + * were energy counter might be lower after > + * the sleep period. > + */ > + if (pkg_stat[i].e_end > pkg_stat[i].e_start) { > + pkg_stat[i].e_delta = > + pkg_stat[i].e_end - pkg_stat[i].e_start; > + } else { > + pkg_stat[i].e_delta = 0; > + } > + break; > + } > + } > + } > + > + /* Delta of ticks spend by each thread between the sample */ > + for (int i = 0; i < num_threads; i++) { > + vmsr_read_thread_stat(vmsr->pid, > + thd_stat[i].thread_id, > + thd_stat[i].utime, > + thd_stat[i].stime, > + &thd_stat[i].cpu_id); > + > + if (vmsr->pid < 0) { > + /* > + * We don't count the dead thread > + * i.e threads that existed before the sleep > + * and not anymore > + */ > + thd_stat[i].delta_ticks = 0; > + } else { > + vmsr_delta_ticks(thd_stat, i); > + } > + } > + > + /* > + * Identify the vcpu threads > + * Calculate the number of vcpu per package > + */ > + CPU_FOREACH(cpu) { > + for (int i = 0; i < num_threads; i++) { > + if (cpu->thread_id == thd_stat[i].thread_id) { > + thd_stat[i].is_vcpu = true; > + thd_stat[i].vcpu_id = cpu->cpu_index; > + pkg_stat[thd_stat[i].pkg_id].nb_vcpu++; > + thd_stat[i].acpi_id = kvm_arch_vcpu_id(cpu); > + break; > + } > + } > + } > + > + /* Retrieve the virtual package number of each vCPU */ > + for (int i = 0; i < vmsr->x86_cpu_list->len; i++) { > + for (int j = 0; j < num_threads; j++) { > + if ((thd_stat[j].acpi_id == > vmsr->x86_cpu_list->cpus[i].arch_id) > + && (thd_stat[j].is_vcpu == true)) { > + x86_topo_ids_from_apicid(thd_stat[j].acpi_id, > + &vmsr->topo_info, &topo_ids); > + thd_stat[j].vpkg_id = topo_ids.pkg_id; > + } > + } > + } > + > + /* Calculate the total energy of all non-vCPU thread */ > + for (int i = 0; i < num_threads; i++) { > + double temp; Move this inside the 'if', and assign to it at time of declaration. > + if ((thd_stat[i].is_vcpu != true) && > + (thd_stat[i].delta_ticks > 0)) { > + temp = vmsr_get_ratio(pkg_stat[thd_stat[i].pkg_id].e_delta, > + thd_stat[i].delta_ticks, > + vmsr->host_topo.maxticks[thd_stat[i].pkg_id]); > + pkg_stat[thd_stat[i].pkg_id].e_ratio > + += (uint64_t)lround(temp); > + } > + } > + > + /* Calculate the ratio per non-vCPU thread of each package */ > + for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) { > + if (pkg_stat[i].nb_vcpu > 0) { > + pkg_stat[i].e_ratio = pkg_stat[i].e_ratio / > pkg_stat[i].nb_vcpu; > + } > + } > + > + /* > + * Calculate the energy for each Package: > + * Energy Package = sum of each vCPU energy that belongs to the > package > + */ > + for (int i = 0; i < num_threads; i++) { > + double temp; Aain can move inside the 'if' > + > + if ((thd_stat[i].is_vcpu == true) && \ > + (thd_stat[i].delta_ticks > 0)) { > + temp = vmsr_get_ratio(pkg_stat[thd_stat[i].pkg_id].e_delta, > + thd_stat[i].delta_ticks, > + vmsr->host_topo.maxticks[thd_stat[i].pkg_id]); > + vpkgs_energy_stat[thd_stat[i].vpkg_id] += > + (uint64_t)lround(temp); > + vpkgs_energy_stat[thd_stat[i].vpkg_id] += > + pkg_stat[thd_stat[i].pkg_id].e_ratio; > + } > + } > + > + /* > + * Finally populate the vmsr register of each vCPU with the total > + * package value to emulate the real hardware where each CPU return > the > + * value of the package it belongs. > + */ > + for (int i = 0; i < num_threads; i++) { > + if ((thd_stat[i].is_vcpu == true) && \ > + (thd_stat[i].delta_ticks > 0)) { > + vmsr->msr_value[thd_stat[i].vcpu_id] = \ > + > vpkgs_energy_stat[thd_stat[i].vpkg_id]; > + } > + } > + > + /* Freeing memory before zeroing the pointer */ > + for (int i = 0; i < num_threads; i++) { > + g_free(thd_stat[i].utime); > + g_free(thd_stat[i].stime); > + } > + /* Zeroing the reused memory with g_renew */ > + memset(thd_stat, 0, num_threads * sizeof(thread_stat)); This memset() should be done immediately after the 'g_renew' call, since if g_renew enlarged the array, it won't have zero'd the newly added elements. > + memset(thread_ids, 0, sizeof(pid_t)); We need to be free'ing thread_ids, not zero'ing it. This memset can just be removed entirely if you follow my earlier suggestion to rely on g_autofree and declare thread_ids inside the loop body. > + } > + > +clean: > + rcu_unregister_thread(); > + return NULL; > +} > + > +static int kvm_msr_energy_thread_init(KVMState *s, MachineState *ms) > +{ > + struct KVMMsrEnergy *r = &s->msr_energy; > + int ret = 0; > + > + /* > + * Sanity check > + * 1. Host cpu must be Intel cpu > + * 2. RAPL must be enabled on the Host > + */ > + if (is_host_cpu_intel()) { > + error_report("The RAPL feature can only be enabled on hosts\ > + with Intel CPU models"); > + ret = 1; > + goto out; > + } > + > + if (!is_rapl_enabled()) { > + ret = 1; > + goto out; > + } > + > + /* Retrieve the virtual topology */ > + vmsr_init_topo_info(&r->topo_info, ms); > + > + /* Retrieve the number of vcpu */ > + r->vcpus = ms->smp.cpus; > + > + /* Retrieve the number of virtual sockets */ > + r->vsockets = ms->smp.sockets; > + > + /* Allocate register memory (MSR_PKG_STATUS) for each vcpu */ > + r->msr_value = g_new0(uint64_t, r->vcpus); > + > + /* Retrieve the CPUArchIDlist */ > + r->x86_cpu_list = x86_possible_cpu_arch_ids(ms); > + > + /* Max number of cpus on the Host */ > + r->host_topo.maxcpus = vmsr_get_maxcpus(); > + if (r->host_topo.maxcpus == 0) { > + error_report("host max cpus = 0"); > + ret = 1; > + goto out; > + } > + > + /* Max number of packages on the host */ > + r->host_topo.maxpkgs = > vmsr_get_max_physical_package(r->host_topo.maxcpus); > + if (r->host_topo.maxpkgs == 0) { > + error_report("host max pkgs = 0"); > + ret = 1; > + goto out; > + } > + > + /* Allocate memory for each package on the host */ > + r->host_topo.pkg_cpu_count = g_new0(unsigned int, r->host_topo.maxpkgs); > + r->host_topo.maxticks = g_new0(unsigned int, r->host_topo.maxpkgs); > + > + vmsr_count_cpus_per_package(r->host_topo.pkg_cpu_count, > + r->host_topo.maxpkgs); > + for (int i = 0; i < r->host_topo.maxpkgs; i++) { > + if (r->host_topo.pkg_cpu_count[i] == 0) { > + error_report("cpu per packages = 0 on package_%d", i); > + ret = 1; > + goto out; > + } > + } > + > + /* Get QEMU PID*/ > + r->pid = getpid(); > + > + /* Compute the socket path if necessary */ > + if (s->msr_energy.socket_path == NULL) { > + s->msr_energy.socket_path = vmsr_compute_default_paths(); > + } > + > + /* Open socket with vmsr helper */ > + s->msr_energy.sioc = vmsr_open_socket(s->msr_energy.socket_path); Need to detect & report error if opening the socket failed. > + > + /* Those MSR values should not change */ > + r->msr_unit = vmsr_read_msr(MSR_RAPL_POWER_UNIT, 0, r->pid, > + s->msr_energy.sioc); > + r->msr_limit = vmsr_read_msr(MSR_PKG_POWER_LIMIT, 0, r->pid, > + s->msr_energy.sioc); > + r->msr_info = vmsr_read_msr(MSR_PKG_POWER_INFO, 0, r->pid, > + s->msr_energy.sioc); > + if (r->msr_unit == 0 || r->msr_limit == 0 || r->msr_info == 0) { > + error_report("can't read any virtual msr"); > + ret = 1; > + goto out; > + } > + > + qemu_thread_create(&r->msr_thr, "kvm-msr", > + kvm_msr_energy_thread, > + s, QEMU_THREAD_JOINABLE); > +out: > + return ret; > +} > + > int kvm_arch_get_default_type(MachineState *ms) > { > return 0; > diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c > new file mode 100644 > index 000000000000..934d14c4305f > --- /dev/null > +++ b/target/i386/kvm/vmsr_energy.c > @@ -0,0 +1,335 @@ > +/* > + * QEMU KVM support -- x86 virtual RAPL msr > + * > + * Copyright 2024 Red Hat, Inc. 2024 > + * > + * Author: > + * Anthony Harivel <ahari...@redhat.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/error-report.h" > +#include "vmsr_energy.h" > +#include "io/channel.h" > +#include "io/channel-socket.h" > +#include "hw/boards.h" > +#include "cpu.h" > +#include "host-cpu.h" > + > +QIOChannelSocket *vmsr_open_socket(const char *path) > +{ > + g_autofree char *socket_path = NULL; > + > + socket_path = g_strdup(path); Assign this at time of declaring socket_path > + > + SocketAddress saddr = { > + .type = SOCKET_ADDRESS_TYPE_UNIX, > + .u.q_unix.path = socket_path > + }; > + > + QIOChannelSocket *sioc = qio_channel_socket_new(); > + Error *local_err = NULL; > + > + qio_channel_set_name(QIO_CHANNEL(sioc), "vmsr-helper"); > + qio_channel_socket_connect_sync(sioc, > + &saddr, > + &local_err); > + if (local_err) { > + /* Close socket. */ > + qio_channel_close(QIO_CHANNEL(sioc), NULL); > + object_unref(OBJECT(sioc)); You need a 'return NULL' here, otherwise we're returning a free'd pointer to the caller. Aso we need to report the error. > + } > + > + return sioc; Indent misaligned. > +} > + > +uint64_t vmsr_read_msr(uint32_t reg, uint32_t cpu_id, uint32_t tid, > + QIOChannelSocket *sioc) > +{ > + uint64_t data = 0; > + int r = 0; > + Error *local_err = NULL; > + g_autofree uint32_t buffer[3]; g_autofree is wrong for a stack allocated array, just remove it. > + /* > + * Send the required arguments: > + * 1. RAPL MSR register to read > + * 2. On which CPU ID > + * 3. From which vCPU (Thread ID) > + */ > + buffer[0] = reg; > + buffer[1] = cpu_id; > + buffer[2] = tid; > + > + r = qio_channel_write_all(QIO_CHANNEL(sioc), > + (char *)buffer, sizeof(buffer), > + &local_err); > + if (r < 0) { > + goto out_close; > + } > + > + r = qio_channel_read_all(QIO_CHANNEL(sioc), > + (char *)data, sizeof(data), > + &local_err); > + if (r < 0) { > + data = 0; > + goto out_close; > + } > + > +out_close: > + return data; > +} > +/* Retrieve the max number of physical cpu on the host */ > +unsigned int vmsr_get_maxcpus(void) > +{ > + GDir *dir; > + const gchar *entry_name; > + unsigned int cpu_count = 0; > + const char *path = "/sys/devices/system/cpu/"; > + > + dir = g_dir_open(path, 0, NULL); > + if (dir == NULL) { > + error_report("Unable to open cpu directory"); Slightly more friendly to include the path in the error message, and also the error that glib reports.eg g_autoptr(GError) gerr = NULL; GDir *dir = g_dir_open(path, 0, &gerr); if (dir == NULL) { error_report("Unable to open cpu directory %s: %s", path, err->message); > + return -1; > + } > + > + while ((entry_name = g_dir_read_name(dir)) != NULL) { > + if (g_ascii_strncasecmp(entry_name, "cpu", 3) == 0 && > + isdigit(entry_name[3])) { > + cpu_count++; > + } > + } > + > + g_dir_close(dir); > + > + return cpu_count; > +} > + > +/* Count the number of physical cpu on each packages */ > +unsigned int vmsr_count_cpus_per_package(unsigned int *package_count, > + unsigned int max_pkgs) > +{ > + g_autofree char *file_contents = NULL; > + g_autofree char *path = NULL; > + gsize length; > + > + /* Iterate over cpus and count cpus in each package */ > + for (int cpu_id = 0; ; cpu_id++) { > + path = g_build_filename( > + g_strdup_printf("/sys/devices/system/cpu/cpu%d/" > + "topology/physical_package_id", cpu_id), NULL); The result of 'g_strdup_printf' needs assigning to a variable marked with 'g_autofree', before being passed to g_build_filename to avoid a mem leak. > + > + if (!g_file_get_contents(path, &file_contents, &length, NULL)) { > + break; /* No more cpus */ > + } > + > + /* Get the physical package ID for this CPU */ > + int package_id = atoi(file_contents); > + > + /* Check if the package ID is within the known number of packages */ > + if (package_id >= 0 && package_id < max_pkgs) { > + /* If yes, count the cpu for this package*/ > + package_count[package_id]++; > + } > + } > + > + return 0; > +} > +/* Read the scheduled time for a given thread of a give pid */ > +void vmsr_read_thread_stat(pid_t pid, > + unsigned int thread_id, > + unsigned long long *utime, > + unsigned long long *stime, > + unsigned int *cpu_id) > +{ > + g_autofree char *path = NULL; > + > + path = g_build_filename(g_strdup_printf("/proc/%u/task/%d/stat", pid, \ > + thread_id), NULL); Again need to assign g_strdup_printf result to a g_autofree marked variable to avoid memleak. > + > + FILE *file = fopen(path, "r"); > + if (file == NULL) { > + pid = -1; > + return; > + } > + > + if (fscanf(file, "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u > %*u" > + " %llu %llu %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u" > + " %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %*u %*u %u", > + utime, stime, cpu_id) != 3) > + { > + pid = -1; > + return; > + } > + > + fclose(file); > + return; > +} > + > +/* Read QEMU stat task folder to retrieve all QEMU threads ID */ > +pid_t *vmsr_get_thread_ids(pid_t pid, unsigned int *num_threads) > +{ > + g_autofree char *path = g_build_filename("/proc", > + g_strdup_printf("%d/task", pid), NULL); Another memleak of g_strdup_printf()result > + > + DIR *dir = opendir(path); > + if (dir == NULL) { > + error_report("Error opening /proc/qemu/task"); > + return NULL; > + } > + > + pid_t *thread_ids = NULL; > + unsigned int thread_count = 0; > + > + g_autofree struct dirent *ent = NULL; > + while ((ent = readdir(dir)) != NULL) { > + if (ent->d_name[0] == '.') { > + continue; > + } > + pid_t tid = atoi(ent->d_name); > + if (pid != tid) { > + thread_ids = g_renew(pid_t, thread_ids, (thread_count + 1)); > + thread_ids[thread_count] = tid; > + thread_count++; > + } > + } > + > + closedir(dir); > + > + *num_threads = thread_count; > + return thread_ids; > +} > diff --git a/target/i386/kvm/vmsr_energy.h b/target/i386/kvm/vmsr_energy.h > new file mode 100644 > index 000000000000..a04114179346 > --- /dev/null > +++ b/target/i386/kvm/vmsr_energy.h > @@ -0,0 +1,99 @@ > +/* > + * QEMU KVM support -- x86 virtual energy-related MSR. > + * > + * Copyright 2024 Red Hat, Inc. 2024 > + * > + * Author: > + * Anthony Harivel <ahari...@redhat.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef VMSR_ENERGY_H > +#define VMSR_ENERGY_H > + > +#include <stdint.h> > +#include "qemu/osdep.h" > +#include "io/channel-socket.h" > +#include "hw/i386/topology.h" > + > +/* > + * Define the interval time in micro seconds between 2 samples of > + * energy related MSRs > + */ > +#define MSR_ENERGY_THREAD_SLEEP_US 1000000.0 > + > +/* > + * Thread statistic > + * @ thread_id: TID (thread ID) > + * @ is_vcpu: true if TID is vCPU thread > + * @ cpu_id: CPU number last executed on > + * @ pkg_id: package number of the CPU > + * @ vcpu_id: vCPU ID > + * @ vpkg: virtual package number > + * @ acpi_id: APIC id of the vCPU > + * @ utime: amount of clock ticks the thread > + * has been scheduled in User mode > + * @ stime: amount of clock ticks the thread > + * has been scheduled in System mode > + * @ delta_ticks: delta of utime+stime between > + * the two samples (before/after sleep) > + */ > +struct thread_stat { Suggest 'vmsr_thread_stat' to better namespace > + unsigned int thread_id; > + bool is_vcpu; > + unsigned int cpu_id; > + unsigned int pkg_id; > + unsigned int vpkg_id; > + unsigned int vcpu_id; > + unsigned long acpi_id; > + unsigned long long *utime; > + unsigned long long *stime; > + unsigned long long delta_ticks; > +}; > + > +/* > + * Package statistic > + * @ e_start: package energy counter before the sleep > + * @ e_end: package energy counter after the sleep > + * @ e_delta: delta of package energy counter > + * @ e_ratio: store the energy ratio of non-vCPU thread > + * @ nb_vcpu: number of vCPU running on this package > + */ > +struct package_energy_stat { And 'vmsr_package_energy_stat' too > + uint64_t e_start; > + uint64_t e_end; > + uint64_t e_delta; > + uint64_t e_ratio; > + unsigned int nb_vcpu; > +}; > + > +typedef struct thread_stat thread_stat; > +typedef struct package_energy_stat package_energy_stat; > + > +char *vmsr_compute_default_paths(void); > +void vmsr_read_thread_stat(pid_t pid, > + unsigned int thread_id, > + unsigned long long *utime, > + unsigned long long *stime, > + unsigned int *cpu_id); > + > +QIOChannelSocket *vmsr_open_socket(const char *path); > +uint64_t vmsr_read_msr(uint32_t reg, uint32_t cpu_id, > + uint32_t tid, QIOChannelSocket *sioc); > +void vmsr_delta_ticks(thread_stat *thd_stat, int i); > +unsigned int vmsr_get_maxcpus(void); > +unsigned int vmsr_get_max_physical_package(unsigned int max_cpus); > +unsigned int vmsr_count_cpus_per_package(unsigned int *package_count, > + unsigned int max_pkgs); > +int vmsr_get_physical_package_id(int cpu_id); > +pid_t *vmsr_get_thread_ids(pid_t pid, unsigned int *num_threads); > +double vmsr_get_ratio(uint64_t e_delta, > + unsigned long long delta_ticks, > + unsigned int maxticks); > +void vmsr_init_topo_info(X86CPUTopoInfo *topo_info, const MachineState *ms); > +bool is_host_cpu_intel(void); > +int is_rapl_enabled(void); > +#endif /* VMSR_ENERGY_H */ Bearing in mind the other feedback from Intel about future CPUs not using MSRs for this interface, I wonder if a rename is worth doing. eg call the file 'rapl.h', and the APIs / structs all have a 'rapl_' name prefix, instead of 'vmsr'. Then use 'rapl_vmsr_' as a name prefix on the ones that are specifically tied to the MSR based impl. In future we can have 'rapl_tpmi' as the prefix for the new interface. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|