On 01.02.2018 17:15, Yury Norov wrote:
On Thu, Feb 01, 2018 at 02:57:59PM +0100, Tomasz Nowicki wrote:
Hi Christoffer,

I created simple module for VM kernel. It is spinning on PSCI version
hypercall to measure the base exit cost as you suggested. Also, I measured
CPU cycles for each loop and here are my results:

My setup:
1-socket ThunderX2 running VM - 1VCPU

Tested baselines:
a) host kernel v4.15-rc3 and VM kernel v4.15-rc3
b) host kernel v4.15-rc3 + vhe-optimize-v3-with-fixes and VM kernel

Module was loaded from VM and the results are presented in [%] relative to
average CPU cycles spending on PSCI version hypercall for vanilla VHE host
kernel v4.15-rc3:

              VHE  |  nVHE
baseline a)  100% |  130%
baseline a)  36%  |  123%

So I confirm significant performance improvement, especially for VHE case.
Additionally, I run network throughput tests with vhost-net but for that
case no differences.

Hi Tomasz,

Can you share your test?


#include <linux/arm-smccc.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/psci.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>

#include <uapi/linux/psci.h>

#define SAMPLE_N        (10000UL)
#define SAMPLES         (500)
#define CPU_PINNED      (10)

static struct task_struct *kvm_bench_task;

static unsigned long __invoke_psci_fn_hvc(unsigned long function_id,
                        unsigned long arg0, unsigned long arg1,
                        unsigned long arg2)
        struct arm_smccc_res res;

        arm_smccc_hvc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res);
        return res.a0;

static u32 psci_get_version(void)
        return __invoke_psci_fn_hvc(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);

static inline u64 get_cycles_custom(void)
        register u64 c;
        __asm__ volatile("mrs %0, cntvct_el0" : "=r"(c));
        return c;

static int kvm_bench_kthread(void *none)
        int test_iter, out = SAMPLES;
        u64 time_before, time;
        u32 ver = psci_get_version();

printk(KERN_INFO "Starting kvm exit cost test, using PSCI get version hypercall");
        printk(KERN_INFO "Obtained PSCIv%d.%d\n", PSCI_VERSION_MAJOR(ver),

        for (test_iter = 0;; test_iter++) {
                if (!(test_iter % SAMPLE_N)) {
                        time_before = get_cycles_custom();


                if (!(test_iter % SAMPLE_N)) {
                        while (!out--) {
                                kvm_bench_task = NULL;
                        time = get_cycles_custom() - time_before;
                        printk(KERN_INFO "iter takes %llu cycles. \n", time);
                        if (kthread_should_stop())

        return 0;

static int __init kvm_bench_init(void)
        int err;

        printk(KERN_INFO "KVM exit cost benchmark\n");

        kvm_bench_task = kthread_create(kvm_bench_kthread, NULL, "kvm_test");
        if(IS_ERR(kvm_bench_task)) {
                printk(KERN_INFO "Unable to start thread.\n");
                err = PTR_ERR(kvm_bench_task);
                return err;
        kthread_bind(kvm_bench_task, CPU_PINNED);
        return 0;

static void __exit kvm_bench_cleanup(void)
        printk(KERN_INFO "KVM benchmark cleaning up\n");
        if (kvm_bench_task)


kvmarm mailing list

Reply via email to