On 08/01/2013 05:52 AM, Andreas Färber wrote: > Hi, > > Am 17.07.2013 08:37, schrieb Alexey Kardashevskiy: >> From: David Gibson <da...@gibson.dropbear.id.au> >> >> Recent (host) kernels support emulating the PAPR defined "XICS" interrupt >> controller system within KVM. This patch allows qemu to initialize and >> configure the in-kernel XICS, and keep its state in sync with qemu's XICS >> state as necessary. >> >> This should give considerable performance improvements. e.g. on a simple >> IPI ping-pong test between hardware threads, using qemu XICS gives us >> around 5,000 irqs/second, whereas the in-kernel XICS gives us around >> 70,000 irqs/s on the same hardware configuration. >> >> [Mike Qiu <qiud...@linux.vnet.ibm.com>: fixed mistype which caused >> ics_set_kvm_state() to fail] >> Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> >> [aik: moved to a separate device, reworked QOM] >> >> --- >> Changes: >> 2013/07/17: >> * QOM rework >> >> 2013/07/01 >> * fixed VMState names in order to support xics-kvm migration to xics and >> vice versa >> >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > > Something went wrong here. ;)
Yeah. git format-patch -s gets confused if commit message has parts separated with '---' :) > [...] >> diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c >> new file mode 100644 >> index 0000000..04ce1be >> --- /dev/null >> +++ b/hw/intc/xics_kvm.c > [...] >> +typedef struct XICSStateKVM { >> + struct XICSState parent; > > parent_obj to align with most new devices? > >> + >> + uint32_t set_xive_token; >> + uint32_t get_xive_token; >> + uint32_t int_off_token; >> + uint32_t int_on_token; >> + int kernel_xics_fd; >> +} XICSStateKVM; > [...] >> +static int ics_kvm_realize(DeviceState *dev) >> +{ >> + ICSState *ics = ICS(dev); >> + >> + ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState)); >> + ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool)); >> + ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs); >> + >> + return 0; >> +} >> + >> +static void ics_kvm_class_init(ObjectClass *klass, void *data) >> +{ >> + DeviceClass *dc = DEVICE_CLASS(klass); >> + ICSStateClass *k = ICS_CLASS(klass); >> + >> + dc->init = ics_kvm_realize; > > This is fishy - probably you want to assign to dc->realize and fix the > signature? > > Andreas > >> + dc->reset = ics_kvm_reset; >> + k->pre_save = ics_get_kvm_state; >> + k->post_load = ics_set_kvm_state; >> +} >> + >> +static TypeInfo ics_kvm_info = { > > static const for all TypeInfos. > >> + .name = TYPE_ICS_KVM, >> + .parent = TYPE_ICS, >> + .instance_size = sizeof(ICSState), >> + .class_init = ics_kvm_class_init, >> +}; >> + >> +/* >> + * XICS-KVM >> + */ >> +static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu) >> +{ >> + CPUState *cs; >> + ICPState *ss; >> + XICSStateKVM *icpkvm = (XICSStateKVM *) object_dynamic_cast( >> + OBJECT(icp), TYPE_XICS_KVM); >> + XICSStateClass *xics_info = XICS_CLASS(object_class_by_name(TYPE_XICS)); > > Are you intentionally accessing that class by name rather than using > XICS_GET_CLASS(icp), which allows the KVM variant to overwrite things? This is KVM's CPU_setup(). I want to call non-KVM CPU_setup afterwards, i.e. "call parent method". XICS_GET_CLASS will return XICS_KVM class but not XICS, no? >> + >> + if (!icpkvm) { >> + return; >> + } >> + >> + cs = CPU(cpu); >> + ss = &icp->ss[cs->cpu_index]; >> + >> + assert(cs->cpu_index < icp->nr_servers); >> + if (icpkvm->kernel_xics_fd == -1) { >> + abort(); >> + } >> + >> + if (icpkvm->kernel_xics_fd != -1) { >> + int ret; >> + struct kvm_enable_cap xics_enable_cap = { >> + .cap = KVM_CAP_IRQ_XICS, >> + .flags = 0, >> + .args = {icpkvm->kernel_xics_fd, cs->cpu_index, 0, 0}, >> + }; >> + >> + ss->cs = cs; >> + >> + ret = kvm_vcpu_ioctl(ss->cs, KVM_ENABLE_CAP, &xics_enable_cap); >> + if (ret < 0) { >> + fprintf(stderr, "Unable to connect CPU%d to kernel XICS: %s\n", >> + cs->cpu_index, strerror(errno)); > > error_report() in place of all fprintf(stderr, ...)? (without \n then) > >> + exit(1); >> + } >> + } >> + >> + /* Call emulated XICS implementation for consistency */ >> + assert(xics_info); > > If you want to go safe, you could add assert(xics_info->cpu_setup). > >> + xics_info->cpu_setup(icp, cpu); >> +} >> + >> +static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr, >> + uint32_t token, >> + uint32_t nargs, target_ulong args, >> + uint32_t nret, target_ulong rets) >> +{ >> + fprintf(stderr, "pseries: %s must never be called for in-kernel XICS\n", >> + __func__); >> +} >> + >> +static void xics_kvm_realize(DeviceState *dev, Error **errp) >> +{ >> + XICSStateKVM *icpkvm = XICS_KVM(dev); >> + XICSState *icp = XICS(dev); >> + ICSState *ics; >> + QemuOptsList *list = qemu_find_opts("machine"); >> + int i, rc; >> + struct kvm_create_device xics_create_device = { >> + .type = KVM_DEV_TYPE_XICS, >> + .flags = 0, >> + }; >> + >> + if (!kvm_enabled()) { >> + error_setg(errp, "KVM must be enabled for in-kernel XICS"); >> + goto fail; >> + } >> + >> + if (QTAILQ_EMPTY(&list->head) || >> + !qemu_opt_get_bool(QTAILQ_FIRST(&list->head), >> + "kernel_irqchip", true) || > > Is it safe to take a value from the first element in the list? Shouldn't > merging -machine spapr,accel=kvm -machine kernel_irqchip=on be taken > into account? > >> + !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) { >> + error_setg(errp, "KVM must be enabled for in-kernel XICS"); >> + return; >> + } >> + >> + icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", >> rtas_dummy); >> + icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", >> rtas_dummy); >> + icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy); >> + icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy); >> + >> + rc = kvmppc_define_rtas_token(icpkvm->set_xive_token, "ibm,set-xive"); >> + if (rc < 0) { >> + error_setg(errp, "kvmppc_define_rtas_token: ibm,set-xive"); >> + goto fail; >> + } >> + >> + rc = kvmppc_define_rtas_token(icpkvm->get_xive_token, "ibm,get-xive"); >> + if (rc < 0) { >> + error_setg(errp, "kvmppc_define_rtas_token: ibm,get-xive"); >> + goto fail; >> + } >> + >> + rc = kvmppc_define_rtas_token(icpkvm->int_on_token, "ibm,int-on"); >> + if (rc < 0) { >> + error_setg(errp, "kvmppc_define_rtas_token: ibm,int-on"); >> + goto fail; >> + } >> + >> + rc = kvmppc_define_rtas_token(icpkvm->int_off_token, "ibm,int-off"); >> + if (rc < 0) { >> + error_setg(errp, "kvmppc_define_rtas_token: ibm,int-off"); >> + goto fail; >> + } >> + >> + /* Create the kernel ICP */ >> + rc = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &xics_create_device); >> + if (rc < 0) { >> + error_setg_errno(errp, -rc, "Error on KVM_CREATE_DEVICE for XICS"); >> + goto fail; >> + } >> + >> + icpkvm->kernel_xics_fd = xics_create_device.fd; >> + >> + icp->ics = ICS(object_new(TYPE_ICS_KVM)); >> + ics = icp->ics; >> + object_property_add_child(OBJECT(icp), "ics", OBJECT(icp->ics), NULL); >> + >> + ics->nr_irqs = icp->nr_irqs; >> + ics->offset = XICS_IRQ_BASE; >> + ics->icp = icp; >> + qdev_init_nofail(DEVICE(ics)); > > Since this is in a realize function, please use > object_property_set_bool(), which gives you access to the Error - to > catch it you can use a local Error *err variable. > >> + >> + icp->ss = g_malloc0(icp->nr_servers*sizeof(ICPState)); >> + for (i = 0; i < icp->nr_servers; i++) { >> + char buffer[32]; >> + object_initialize(&icp->ss[i], TYPE_ICP_KVM); >> + snprintf(buffer, sizeof(buffer), "icp[%d]", i); >> + object_property_add_child(OBJECT(icp), buffer, OBJECT(&icp->ss[i]), >> NULL); >> + qdev_init_nofail(DEVICE(&icp->ss[i])); > > object_property_set_bool() ? Anthony did XICS refactoring recently and that has qdev_init_nofail(). > Where does icp->nr_servers come from? Via properties in try_create_xics() (hw/ppc/spapr.c). > Is there no way to split this into > instance_init and realize? Why would we want to split? >> + } >> + return; >> + >> +fail: >> + kvmppc_define_rtas_token(0, "ibm,set-xive"); >> + kvmppc_define_rtas_token(0, "ibm,get-xive"); >> + kvmppc_define_rtas_token(0, "ibm,int-on"); >> + kvmppc_define_rtas_token(0, "ibm,int-off"); >> + return; >> +} >> + >> +static void xics_kvm_class_init(ObjectClass *oc, void *data) >> +{ >> + DeviceClass *dc = DEVICE_CLASS(oc); >> + XICSStateClass *k = XICS_CLASS(oc); > > xsc? Whyyyy? Many places keep using "k". >> + >> + dc->realize = xics_kvm_realize; >> + k->cpu_setup = xics_kvm_cpu_setup; >> +} >> + >> +static const TypeInfo xics_kvm_info = { >> + .name = TYPE_XICS_KVM, >> + .parent = TYPE_XICS, >> + .instance_size = sizeof(XICSStateKVM), >> + .class_init = xics_kvm_class_init, >> +}; >> + >> +static void xics_kvm_register_types(void) >> +{ >> + type_register_static(&xics_kvm_info); >> + type_register_static(&ics_kvm_info); >> + type_register_static(&icp_kvm_info); >> +} >> + >> +type_init(xics_kvm_register_types) >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index 432f0d2..84433ee 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -148,7 +148,31 @@ static XICSState *xics_system_init(int nr_servers, int >> nr_irqs) >> { >> XICSState *icp = NULL; >> >> - icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs); >> + if (kvm_enabled()) { >> + bool irqchip_allowed = true, irqchip_required = false; >> + QemuOptsList *list = qemu_find_opts("machine"); >> + >> + if (!QTAILQ_EMPTY(&list->head)) { >> + irqchip_allowed = qemu_opt_get_bool(QTAILQ_FIRST(&list->head), >> + "kernel_irqchip", true); >> + irqchip_required = qemu_opt_get_bool(QTAILQ_FIRST(&list->head), >> + "kernel_irqchip", false); >> + } > > Again, don't we have APIs to help with opts access? Hm, I copied this from openpic but it went further since then :) >> + >> + if (irqchip_allowed) { >> + icp = try_create_xics(TYPE_XICS_KVM, nr_servers, nr_irqs); >> + } >> + >> + if (irqchip_required && !icp) { >> + perror("iFailed to create in-kernel XICS\n"); > > "Failed to ..."? vim... -- Alexey