Re: [PATCH 2/3] KVM: Emulate MSI-X table in kernel
On Thursday 03 February 2011 09:05:55 Marcelo Tosatti wrote: On Sun, Jan 30, 2011 at 01:11:15PM +0800, Sheng Yang wrote: Then we can support mask bit operation of assigned devices now. Signed-off-by: Sheng Yang sh...@linux.intel.com +int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm, + struct kvm_msix_mmio_user *mmio_user) +{ + struct kvm_msix_mmio_dev *mmio_dev = kvm-msix_mmio_dev; + struct kvm_msix_mmio *mmio = NULL; + int r = 0, i; + + mutex_lock(mmio_dev-lock); + for (i = 0; i mmio_dev-mmio_nr; i++) { + if (mmio_dev-mmio[i].dev_id == mmio_user-dev_id + (mmio_dev-mmio[i].type KVM_MSIX_MMIO_TYPE_DEV_MASK) == + (mmio_user-type KVM_MSIX_MMIO_TYPE_DEV_MASK)) { + mmio = mmio_dev-mmio[i]; + if (mmio-max_entries_nr != mmio_user-max_entries_nr) { + r = -EINVAL; + goto out; + } + break; + } Why allow this ioctl to succeed if there's an entry already present? This case is broken as mmio_dev-mmio_nr is increased below. Oh, It's a bug to let mmio_nr increase even with MMIO found. I've fixed it. The reason we allow multiply callings is userspace can register different types of address here(table address and PBA address). PCI bits must be reviewed... Pardon? PCI related things are already in 2.6.38-rc. -- regards Yang, Sheng -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/4 v9] MSI-X MMIO support for KVM
Sorry for the long delay, just come back from vacation... Change from v8: 1. Update struct kvm_run to contain MSI-X routing update exit specific information. 2. Fix a mmio_nr counting bug. Notice this patchset still based on 2.6.37 due to a block bug on assigned device in the upstream now. Sheng Yang (4): KVM: Move struct kvm_io_device to kvm_host.h KVM: Add kvm_io_ext_data to IO handler KVM: Emulate MSI-X table in kernel KVM: Add documents for MSI-X MMIO API Documentation/kvm/api.txt | 58 + arch/x86/kvm/Makefile |2 +- arch/x86/kvm/i8254.c |6 +- arch/x86/kvm/i8259.c |3 +- arch/x86/kvm/lapic.c |3 +- arch/x86/kvm/x86.c| 40 +-- include/linux/kvm.h | 28 + include/linux/kvm_host.h | 65 ++- virt/kvm/assigned-dev.c | 44 +++ virt/kvm/coalesced_mmio.c |3 +- virt/kvm/eventfd.c|2 +- virt/kvm/ioapic.c |2 +- virt/kvm/iodev.h | 31 + virt/kvm/kvm_main.c | 40 ++- virt/kvm/msix_mmio.c | 293 + virt/kvm/msix_mmio.h | 25 16 files changed, 594 insertions(+), 51 deletions(-) create mode 100644 virt/kvm/msix_mmio.c create mode 100644 virt/kvm/msix_mmio.h -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h
Then it can be used by other struct in kvm_host.h Signed-off-by: Sheng Yang sh...@linux.intel.com --- include/linux/kvm_host.h | 23 +++ virt/kvm/iodev.h | 25 + 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b5021db..7d313e0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -98,6 +98,29 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +struct kvm_io_device; + +/** + * kvm_io_device_ops are called under kvm slots_lock. + * read and write handlers return 0 if the transaction has been handled, + * or non-zero to have it passed to the next device. + **/ +struct kvm_io_device_ops { + int (*read)(struct kvm_io_device *this, + gpa_t addr, + int len, + void *val); + int (*write)(struct kvm_io_device *this, +gpa_t addr, +int len, +const void *val); + void (*destructor)(struct kvm_io_device *this); +}; + +struct kvm_io_device { + const struct kvm_io_device_ops *ops; +}; + struct kvm_vcpu { struct kvm *kvm; #ifdef CONFIG_PREEMPT_NOTIFIERS diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index 12fd3ca..d1f5651 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h @@ -17,32 +17,9 @@ #define __KVM_IODEV_H__ #include linux/kvm_types.h +#include linux/kvm_host.h #include asm/errno.h -struct kvm_io_device; - -/** - * kvm_io_device_ops are called under kvm slots_lock. - * read and write handlers return 0 if the transaction has been handled, - * or non-zero to have it passed to the next device. - **/ -struct kvm_io_device_ops { - int (*read)(struct kvm_io_device *this, - gpa_t addr, - int len, - void *val); - int (*write)(struct kvm_io_device *this, -gpa_t addr, -int len, -const void *val); - void (*destructor)(struct kvm_io_device *this); -}; - - -struct kvm_io_device { - const struct kvm_io_device_ops *ops; -}; - static inline void kvm_iodevice_init(struct kvm_io_device *dev, const struct kvm_io_device_ops *ops) { -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler
Add a new parameter to IO writing handler, so that we can transfer information from IO handler to caller. Signed-off-by: Sheng Yang sh...@linux.intel.com --- arch/x86/kvm/i8254.c |6 -- arch/x86/kvm/i8259.c |3 ++- arch/x86/kvm/lapic.c |3 ++- arch/x86/kvm/x86.c| 13 - include/linux/kvm_host.h | 12 ++-- virt/kvm/coalesced_mmio.c |3 ++- virt/kvm/eventfd.c|2 +- virt/kvm/ioapic.c |2 +- virt/kvm/iodev.h |6 -- virt/kvm/kvm_main.c |4 ++-- 10 files changed, 36 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index efad723..bd8f0c5 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -439,7 +439,8 @@ static inline int pit_in_range(gpa_t addr) } static int pit_ioport_write(struct kvm_io_device *this, - gpa_t addr, int len, const void *data) + gpa_t addr, int len, const void *data, + struct kvm_io_ext_data *ext_data) { struct kvm_pit *pit = dev_to_pit(this); struct kvm_kpit_state *pit_state = pit-pit_state; @@ -585,7 +586,8 @@ static int pit_ioport_read(struct kvm_io_device *this, } static int speaker_ioport_write(struct kvm_io_device *this, - gpa_t addr, int len, const void *data) + gpa_t addr, int len, const void *data, + struct kvm_io_ext_data *ext_data) { struct kvm_pit *pit = speaker_to_pit(this); struct kvm_kpit_state *pit_state = pit-pit_state; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 3cece05..96b1070 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -480,7 +480,8 @@ static inline struct kvm_pic *to_pic(struct kvm_io_device *dev) } static int picdev_write(struct kvm_io_device *this, -gpa_t addr, int len, const void *val) +gpa_t addr, int len, const void *val, +struct kvm_io_ext_data *ext_data) { struct kvm_pic *s = to_pic(this); unsigned char data = *(unsigned char *)val; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 93cf9d0..f413e9c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -836,7 +836,8 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) } static int apic_mmio_write(struct kvm_io_device *this, - gpa_t address, int len, const void *data) + gpa_t address, int len, const void *data, + struct kvm_io_ext_data *ext_data) { struct kvm_lapic *apic = to_lapic(this); unsigned int offset = address - apic-base_address; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fa708c9..21b84e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3571,13 +3571,14 @@ static void kvm_init_msr_list(void) } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, - const void *v) + const void *v, struct kvm_io_ext_data *ext_data) { if (vcpu-arch.apic - !kvm_iodevice_write(vcpu-arch.apic-dev, addr, len, v)) + !kvm_iodevice_write(vcpu-arch.apic-dev, addr, len, v, ext_data)) return 0; - return kvm_io_bus_write(vcpu-kvm, KVM_MMIO_BUS, addr, len, v); + return kvm_io_bus_write(vcpu-kvm, KVM_MMIO_BUS, + addr, len, v, ext_data); } static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) @@ -3807,6 +3808,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_vcpu *vcpu) { gpa_t gpa; + struct kvm_io_ext_data ext_data; gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); @@ -3825,7 +3827,7 @@ mmio: /* * Is this MMIO handled locally? */ - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) + if (!vcpu_mmio_write(vcpu, gpa, bytes, val, ext_data)) return X86EMUL_CONTINUE; vcpu-mmio_needed = 1; @@ -3940,6 +3942,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) { /* TODO: String I/O for in kernel device */ int r; + struct kvm_io_ext_data ext_data; if (vcpu-arch.pio.in) r = kvm_io_bus_read(vcpu-kvm, KVM_PIO_BUS, vcpu-arch.pio.port, @@ -3947,7 +3950,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) else r = kvm_io_bus_write(vcpu-kvm, KVM_PIO_BUS, vcpu-arch.pio.port, vcpu-arch.pio.size, -pd); +pd, ext_data); return r; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7d313e0..6bb211d 100644
[PATCH 4/4] KVM: Add documents for MSI-X MMIO API
Signed-off-by: Sheng Yang sh...@linux.intel.com --- Documentation/kvm/api.txt | 58 + 1 files changed, 58 insertions(+), 0 deletions(-) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index e1a9297..dd10c3b 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -1263,6 +1263,53 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +4.54 KVM_REGISTER_MSIX_MMIO + +Capability: KVM_CAP_MSIX_MMIO +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_msix_mmio_user (in) +Returns: 0 on success, -1 on error + +This API indicates an MSI-X MMIO address of a guest device. Then all MMIO +operation would be handled by kernel. When necessary(e.g. MSI data/address +changed), KVM would exit to userspace using KVM_EXIT_MSIX_ROUTING_UPDATE to +indicate the MMIO modification and require userspace to update IRQ routing +table. + +NOTICE: Writing the MSI-X MMIO page after it was registered with this API may +be dangerous for userspace program. The writing during VM running may result +in synchronization issue therefore the assigned device can't work properly. +The writing is allowed when VM is not running and can be used as save/restore +mechanism. + +struct kvm_msix_mmio_user { + __u32 dev_id; + __u16 type; /* Device type and MMIO address type */ + __u16 max_entries_nr; /* Maximum entries supported */ + __u64 base_addr;/* Guest physical address of MMIO */ + __u64 base_va; /* Host virtual address of MMIO mapping */ + __u64 flags;/* Reserved for now */ + __u64 reserved[4]; +}; + +Current device type can be: +#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV(1 0) + +Current MMIO type can be: +#define KVM_MSIX_MMIO_TYPE_BASE_TABLE (1 8) + +4.55 KVM_UNREGISTER_MSIX_MMIO + +Capability: KVM_CAP_MSIX_MMIO +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_msix_mmio_user (in) +Returns: 0 on success, -1 on error + +This API would unregister the specific MSI-X MMIO, indicated by dev_id and +type fields of struct kvm_msix_mmio_user. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by @@ -1445,6 +1492,17 @@ Userspace can now handle the hypercall and when it's done modify the gprs as necessary. Upon guest entry all guest GPRs will then be replaced by the values in this struct. + /* KVM_EXIT_MSIX_ROUTING_UPDATE*/ + struct { + __u32 dev_id; + __u16 type; + __u16 entry_idx; + __u64 flags; + } msix_routing; + +KVM_EXIT_MSIX_ROUTING_UPDATE indicates one MSI-X entry has been modified, and +userspace need to update the correlated routing table. + /* Fix the size of the union. */ char padding[256]; }; -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/4 v9] qemu-kvm: MSI-X MMIO support for assigned device
Update with kernel patches v9. Sheng Yang (4): qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit qemu-kvm: Ioctl for MSIX MMIO support qemu-kvm: Header file update for MSI-X MMIO support qemu-kvm: MSI-X MMIO support for assigned device hw/device-assignment.c | 284 +-- hw/device-assignment.h |5 +- kvm/include/linux/kvm.h | 28 + qemu-kvm.c | 60 ++ qemu-kvm.h | 26 + 5 files changed, 366 insertions(+), 37 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device
Signed-off-by: Sheng Yang sh...@linux.intel.com --- hw/device-assignment.c | 106 +-- hw/device-assignment.h |3 + qemu-kvm.c | 46 + qemu-kvm.h | 19 + 4 files changed, 160 insertions(+), 14 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 5c162c4..09e3b99 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -71,6 +71,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, uint32_t address, int len); +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) +{ +return (uint32_t)seg 16 | (uint32_t)bus 8 | (uint32_t)devfn; +} + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -274,6 +279,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, AssignedDevRegion *region = r_dev-v_addrs[region_num]; PCIRegion *real_region = r_dev-real_device.regions[region_num]; int ret = 0; +#ifdef KVM_CAP_MSIX_MMIO +int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); +struct kvm_msix_mmio_user msix_mmio; +#endif DEBUG(e_phys=%08 FMT_PCIBUS r_virt=%p type=%d len=%08 FMT_PCIBUS region_num=%d \n, e_phys, region-u.r_virtbase, type, e_size, region_num); @@ -292,6 +301,23 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, cpu_register_physical_memory(e_phys + offset, TARGET_PAGE_SIZE, r_dev-mmio_index); +#ifdef KVM_CAP_MSIX_MMIO +if (cap_mask) { +r_dev-guest_msix_table_addr = e_phys + offset; +memset(msix_mmio, 0, sizeof msix_mmio); +msix_mmio.dev_id = calc_assigned_dev_id(r_dev-h_segnr, +r_dev-h_busnr, r_dev-h_devfn); +msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV | + KVM_MSIX_MMIO_TYPE_BASE_TABLE; +msix_mmio.base_addr = e_phys + offset; +msix_mmio.base_va = (unsigned long)r_dev-msix_table_page; +msix_mmio.max_entries_nr = r_dev-max_msix_entries_nr; +msix_mmio.flags = 0; +ret = kvm_register_msix_mmio(kvm_context, msix_mmio); +if (ret) +fprintf(stderr, fail to register in-kernel msix_mmio!\n); +} +#endif } } @@ -854,11 +880,6 @@ static void free_assigned_device(AssignedDevice *dev) } } -static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) -{ -return (uint32_t)seg 16 | (uint32_t)bus 8 | (uint32_t)devfn; -} - static void assign_failed_examine(AssignedDevice *dev) { char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; @@ -1268,6 +1289,9 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, return r; } +static int assigned_dev_update_routing_handler(void *opaque, +struct kvm_msix_routing_data *data); + static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) { struct kvm_assigned_irq assigned_irq_data; @@ -1494,7 +1518,9 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) msix_table_entry = pci_get_long(pci_dev-config + pos + PCI_MSIX_TABLE); bar_nr = msix_table_entry PCI_MSIX_BIR; msix_table_entry = ~PCI_MSIX_BIR; -dev-msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; +dev-msix_table_addr = pci_region[bar_nr].base_addr + + msix_table_entry; + dev-max_msix_entries_nr = get_msix_entries_max_nr(dev); } #endif @@ -1678,11 +1704,10 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) (8 * (addr 3))) 0x; } -static void msix_mmio_writel(void *opaque, - target_phys_addr_t addr, uint32_t val) +static void assigned_dev_update_routing(void *opaque, +struct kvm_msix_routing_data *data) { AssignedDevice *adev = opaque; -unsigned int offset = addr 0xfff; void *page = adev-msix_table_page; int ctrl_word, index; struct kvm_irq_routing_entry new_entry = {}; @@ -1691,11 +1716,7 @@ static void msix_mmio_writel(void *opaque, struct PCIDevice *pci_dev = adev-dev; uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); -DEBUG(write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n, - addr, val); -memcpy((void *)((char *)page + offset), val, 4); - -index = offset / 16; +index = data-entry_idx; /* Check if mask bit is being accessed */ memcpy(msg_addr, (char *)page + index * 16, 4);
[PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support
Signed-off-by: Sheng Yang sh...@linux.intel.com --- qemu-kvm.c | 14 ++ qemu-kvm.h |7 +++ 2 files changed, 21 insertions(+), 0 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 49cd683..d282c95 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1050,6 +1050,20 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm, } #endif +#ifdef KVM_CAP_MSIX_MMIO +int kvm_register_msix_mmio(kvm_context_t kvm, + struct kvm_msix_mmio_user *mmio_user) +{ +return kvm_vm_ioctl(kvm_state, KVM_REGISTER_MSIX_MMIO, mmio_user); +} + +int kvm_unregister_msix_mmio(kvm_context_t kvm, + struct kvm_msix_mmio_user *mmio_user) +{ +return kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_MSIX_MMIO, mmio_user); +} +#endif + #if defined(KVM_CAP_IRQFD) defined(CONFIG_EVENTFD) #include sys/eventfd.h diff --git a/qemu-kvm.h b/qemu-kvm.h index 88cf276..48ff52d 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -602,6 +602,13 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm, struct kvm_assigned_msix_entry *entry); #endif +#ifdef KVM_CAP_MSIX_MMIO +int kvm_register_msix_mmio(kvm_context_t kvm, + struct kvm_msix_mmio_user *mmio_user); +int kvm_unregister_msix_mmio(kvm_context_t kvm, + struct kvm_msix_mmio_user *mmio_user); +#endif + #else /* !CONFIG_KVM */ typedef struct kvm_context *kvm_context_t; -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] qemu-kvm: Header file update for MSI-X MMIO support
Signed-off-by: Sheng Yang sh...@linux.intel.com --- kvm/include/linux/kvm.h | 28 1 files changed, 28 insertions(+), 0 deletions(-) diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h index e46729e..dcb8f54 100644 --- a/kvm/include/linux/kvm.h +++ b/kvm/include/linux/kvm.h @@ -161,6 +161,7 @@ struct kvm_pit_config { #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 +#define KVM_EXIT_MSIX_ROUTING_UPDATE 19 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -264,6 +265,13 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + /* KVM_EXIT_MSIX_ROUTING_UPDATE*/ + struct { + __u32 dev_id; + __u16 type; + __u16 entry_idx; + __u64 flags; + } msix_routing; /* Fix the size of the union. */ char padding[256]; }; @@ -530,6 +538,7 @@ struct kvm_enable_cap { #ifdef __KVM_HAVE_XCRS #define KVM_CAP_XCRS 56 #endif +#define KVM_CAP_MSIX_MMIO 60 #ifdef KVM_CAP_IRQ_ROUTING @@ -660,6 +669,9 @@ struct kvm_clock_data { #define KVM_XEN_HVM_CONFIG_IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) +/* Available with KVM_CAP_MSIX_MMIO */ +#define KVM_REGISTER_MSIX_MMIO_IOW(KVMIO, 0x7d, struct kvm_msix_mmio_user) +#define KVM_UNREGISTER_MSIX_MMIO _IOW(KVMIO, 0x7e, struct kvm_msix_mmio_user) /* Available with KVM_CAP_PIT_STATE2 */ #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) @@ -781,4 +793,20 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV(1 0) + +#define KVM_MSIX_MMIO_TYPE_BASE_TABLE (1 8) + +#define KVM_MSIX_MMIO_TYPE_DEV_MASK0x00ff +#define KVM_MSIX_MMIO_TYPE_BASE_MASK 0xff00 +struct kvm_msix_mmio_user { + __u32 dev_id; + __u16 type; + __u16 max_entries_nr; + __u64 base_addr; + __u64 base_va; + __u64 flags; + __u64 reserved[4]; +}; + #endif /* __LINUX_KVM_H */ -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit
The old MSI-X enabling method assume the entries are written before MSI-X enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix this. Also, according to the PCI spec, mask bit of MSI-X table should be set after reset. Signed-off-by: Sheng Yang sh...@linux.intel.com --- hw/device-assignment.c | 188 +--- hw/device-assignment.h |2 +- 2 files changed, 162 insertions(+), 28 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index e5205cf..5c162c4 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -1146,15 +1146,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos) #endif #ifdef KVM_CAP_DEVICE_MSIX -static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) + +#define PCI_MSIX_CTRL_MASKBIT 1ul +static int get_msix_entries_max_nr(AssignedDevice *adev) { -AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev); -uint16_t entries_nr = 0, entries_max_nr; -int pos = 0, i, r = 0; -uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl; -struct kvm_assigned_msix_nr msix_nr; -struct kvm_assigned_msix_entry msix_entry; -void *va = adev-msix_table_page; +int pos, entries_max_nr; +PCIDevice *pci_dev = adev-dev; pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); @@ -1162,20 +1159,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) entries_max_nr = PCI_MSIX_TABSIZE; entries_max_nr += 1; +return entries_max_nr; +} + +static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry) +{ +uint32_t msg_ctrl; +void *va = adev-msix_table_page; + +memcpy(msg_ctrl, va + entry * 16 + 12, 4); +return (msg_ctrl PCI_MSIX_CTRL_MASKBIT); +} + +static int get_msix_valid_entries_nr(AssignedDevice *adev, +uint16_t entries_max_nr) +{ +void *va = adev-msix_table_page; +uint32_t msg_ctrl; +uint16_t entries_nr = 0; +int i; + /* Get the usable entry number for allocating */ for (i = 0; i entries_max_nr; i++) { memcpy(msg_ctrl, va + i * 16 + 12, 4); -memcpy(msg_data, va + i * 16 + 8, 4); /* Ignore unused entry even it's unmasked */ -if (msg_data == 0) +if (assigned_dev_msix_entry_masked(adev, i)) continue; entries_nr ++; } +return entries_nr; +} + +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, + uint16_t entries_nr, + uint16_t entries_max_nr) +{ +AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev); +int i, r = 0; +uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl; +struct kvm_assigned_msix_nr msix_nr; +struct kvm_assigned_msix_entry msix_entry; +void *va = adev-msix_table_page; -if (entries_nr == 0) { -fprintf(stderr, MSI-X entry number is zero!\n); -return -EINVAL; -} msix_nr.assigned_dev_id = calc_assigned_dev_id(adev-h_segnr, adev-h_busnr, (uint8_t)adev-h_devfn); msix_nr.entry_nr = entries_nr; @@ -1187,6 +1212,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) } free_dev_irq_entries(adev); +memset(pci_dev-msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV * +sizeof(*pci_dev-msix_entry_used)); adev-irq_entries_nr = entries_nr; adev-entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry)); if (!adev-entry) { @@ -1200,10 +1227,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) if (entries_nr = msix_nr.entry_nr) break; memcpy(msg_ctrl, va + i * 16 + 12, 4); -memcpy(msg_data, va + i * 16 + 8, 4); -if (msg_data == 0) +if (assigned_dev_msix_entry_masked(adev, i)) continue; +memcpy(msg_data, va + i * 16 + 8, 4); memcpy(msg_addr, va + i * 16, 4); memcpy(msg_upper_addr, va + i * 16 + 4, 4); @@ -1217,17 +1244,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) adev-entry[entries_nr].u.msi.address_lo = msg_addr; adev-entry[entries_nr].u.msi.address_hi = msg_upper_addr; adev-entry[entries_nr].u.msi.data = msg_data; -DEBUG(MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!, msg_data, msg_addr); - kvm_add_routing_entry(adev-entry[entries_nr]); +DEBUG(MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n, msg_data, msg_addr); +kvm_add_routing_entry(adev-entry[entries_nr]); msix_entry.gsi = adev-entry[entries_nr].gsi; msix_entry.entry = i; +pci_dev-msix_entry_used[i] = 1; r = kvm_assign_set_msix_entry(kvm_context, msix_entry); if (r) { fprintf(stderr, fail to set MSI-X entry! %s\n, strerror(-r)); break; } -DEBUG(MSI-X
[PATCH v2 06/15] Synchronize VCPU states before reset
This is required to support keeping VCPU states across a system reset. If we do not read the current state before the reset, cpu_synchronize_all_post_reset may write back incorrect state information. The first user of this will be MCE MSR synchronization which currently works around the missing cpu_synchronize_all_states. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- vl.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/vl.c b/vl.c index b436952..7751843 100644 --- a/vl.c +++ b/vl.c @@ -1452,6 +1452,7 @@ static void main_loop(void) } if (qemu_reset_requested()) { pause_all_vcpus(); +cpu_synchronize_all_states(); qemu_system_reset(); resume_all_vcpus(); } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 08/15] kvm: Rename kvm_arch_process_irqchip_events to async_events
We will broaden the scope of this function on x86 beyond irqchip events. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- kvm-all.c |2 +- kvm.h |2 +- target-i386/kvm.c |2 +- target-ppc/kvm.c |2 +- target-s390x/kvm.c |2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index e6a7de4..6522a32 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -893,7 +893,7 @@ int kvm_cpu_exec(CPUState *env) DPRINTF(kvm_cpu_exec()\n); -if (kvm_arch_process_irqchip_events(env)) { +if (kvm_arch_process_async_events(env)) { env-exit_request = 0; return EXCP_HLT; } diff --git a/kvm.h b/kvm.h index 59b2c29..7bc04e0 100644 --- a/kvm.h +++ b/kvm.h @@ -102,7 +102,7 @@ void kvm_arch_post_run(CPUState *env, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); -int kvm_arch_process_irqchip_events(CPUState *env); +int kvm_arch_process_async_events(CPUState *env); int kvm_arch_get_registers(CPUState *env); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index f909661..a416554 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1675,7 +1675,7 @@ void kvm_arch_post_run(CPUState *env, struct kvm_run *run) cpu_set_apic_base(env-apic_state, run-apic_base); } -int kvm_arch_process_irqchip_events(CPUState *env) +int kvm_arch_process_async_events(CPUState *env) { if (kvm_irqchip_in_kernel()) { return 0; diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 3924f4b..6c99a16 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -259,7 +259,7 @@ void kvm_arch_post_run(CPUState *env, struct kvm_run *run) { } -int kvm_arch_process_irqchip_events(CPUState *env) +int kvm_arch_process_async_events(CPUState *env) { return 0; } diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index b349812..5673a95 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -177,7 +177,7 @@ void kvm_arch_post_run(CPUState *env, struct kvm_run *run) { } -int kvm_arch_process_irqchip_events(CPUState *env) +int kvm_arch_process_async_events(CPUState *env) { return 0; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 14/15] Add qemu_ram_remap
From: Huang Ying ying.hu...@intel.com qemu_ram_remap() unmaps the specified RAM pages, then re-maps these pages again. This is used by KVM HWPoison support to clear HWPoisoned page tables across guest rebooting, so that a new page may be allocated later to recover the memory error. [ Jan: style fixlets, WIN32 fix ] Signed-off-by: Huang Ying ying.hu...@intel.com Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- cpu-all.h|4 +++ cpu-common.h |1 + exec.c | 63 +- 3 files changed, 67 insertions(+), 1 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index caf5e6c..4f4631d 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -863,10 +863,14 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr); extern int phys_ram_fd; extern ram_addr_t ram_size; +/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ +#define RAM_PREALLOC_MASK (1 0) + typedef struct RAMBlock { uint8_t *host; ram_addr_t offset; ram_addr_t length; +uint32_t flags; char idstr[256]; QLIST_ENTRY(RAMBlock) next; #if defined(__linux__) !defined(TARGET_S390X) diff --git a/cpu-common.h b/cpu-common.h index 54d21d4..ef4e8da 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -50,6 +50,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, ram_addr_t size, void *host); ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size); void qemu_ram_free(ram_addr_t addr); +void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); /* Same but slower, to use for migration, where the order of diff --git a/exec.c b/exec.c index d611100..9308a97 100644 --- a/exec.c +++ b/exec.c @@ -2867,6 +2867,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, if (host) { new_block-host = host; +new_block-flags |= RAM_PREALLOC_MASK; } else { if (mem_path) { #if defined (__linux__) !defined(TARGET_S390X) @@ -2920,7 +2921,9 @@ void qemu_ram_free(ram_addr_t addr) QLIST_FOREACH(block, ram_list.blocks, next) { if (addr == block-offset) { QLIST_REMOVE(block, next); -if (mem_path) { +if (block-flags RAM_PREALLOC_MASK) { +; +} else if (mem_path) { #if defined (__linux__) !defined(TARGET_S390X) if (block-fd) { munmap(block-host, block-length); @@ -2943,6 +2946,64 @@ void qemu_ram_free(ram_addr_t addr) } +#ifndef _WIN32 +void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) +{ +RAMBlock *block; +ram_addr_t offset; +int flags; +void *area, *vaddr; + +QLIST_FOREACH(block, ram_list.blocks, next) { +offset = addr - block-offset; +if (offset block-length) { +vaddr = block-host + offset; +if (block-flags RAM_PREALLOC_MASK) { +; +} else { +flags = MAP_FIXED; +munmap(vaddr, length); +if (mem_path) { +#if defined(__linux__) !defined(TARGET_S390X) +if (block-fd) { +#ifdef MAP_POPULATE +flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED : +MAP_PRIVATE; +#else +flags |= MAP_PRIVATE; +#endif +area = mmap(vaddr, length, PROT_READ | PROT_WRITE, +flags, block-fd, offset); +} else { +flags |= MAP_PRIVATE | MAP_ANONYMOUS; +area = mmap(vaddr, length, PROT_READ | PROT_WRITE, +flags, -1, 0); +} +#endif +} else { +#if defined(TARGET_S390X) defined(CONFIG_KVM) +flags |= MAP_SHARED | MAP_ANONYMOUS; +area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE, +flags, -1, 0); +#else +flags |= MAP_PRIVATE | MAP_ANONYMOUS; +area = mmap(vaddr, length, PROT_READ | PROT_WRITE, +flags, -1, 0); +#endif +} +if (area != vaddr) { +fprintf(stderr, Could not remap addr: %lx@%lx\n, +length, addr); +exit(1); +} +qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE); +} +return; +} +} +} +#endif /* !_WIN32 */ + /* Return a host pointer to ram allocated with qemu_ram_alloc. With the exception of the softmmu code in this file, this should only be used for local memory (e.g. video ram) that the device owns, -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a
[PATCH v2 15/15] KVM, MCE, unpoison memory address across reboot
From: Huang Ying ying.hu...@intel.com In Linux kernel HWPoison processing implementation, the virtual address in processes mapping the error physical memory page is marked as HWPoison. So that, the further accessing to the virtual address will kill corresponding processes with SIGBUS. If the error physical memory page is used by a KVM guest, the SIGBUS will be sent to QEMU, and QEMU will simulate a MCE to report that memory error to the guest OS. If the guest OS can not recover from the error (for example, the page is accessed by kernel code), guest OS will reboot the system. But because the underlying host virtual address backing the guest physical memory is still poisoned, if the guest system accesses the corresponding guest physical memory even after rebooting, the SIGBUS will still be sent to QEMU and MCE will be simulated. That is, guest system can not recover via rebooting. In fact, across rebooting, the contents of guest physical memory page need not to be kept. We can allocate a new host physical page to back the corresponding guest physical address. This patch fixes this issue in QEMU-KVM via calling qemu_ram_remap() to clear the corresponding page table entry, so that make it possible to allocate a new page to recover the issue. [ Jan: rebasing and tiny cleanups] Signed-off-by: Huang Ying ying.hu...@intel.com Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- target-i386/kvm.c | 36 1 files changed, 36 insertions(+), 0 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 44e5504..7b7105d 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -173,7 +173,40 @@ static int get_para_features(CPUState *env) } #endif /* CONFIG_KVM_PARA */ +typedef struct HWPoisonPage { +ram_addr_t ram_addr; +QLIST_ENTRY(HWPoisonPage) list; +} HWPoisonPage; + +static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list = +QLIST_HEAD_INITIALIZER(hwpoison_page_list); + +static void kvm_unpoison_all(void *param) +{ +HWPoisonPage *page, *next_page; + +QLIST_FOREACH_SAFE(page, hwpoison_page_list, list, next_page) { +QLIST_REMOVE(page, list); +qemu_ram_remap(page-ram_addr, TARGET_PAGE_SIZE); +qemu_free(page); +} +} + #ifdef KVM_CAP_MCE +static void kvm_hwpoison_page_add(ram_addr_t ram_addr) +{ +HWPoisonPage *page; + +QLIST_FOREACH(page, hwpoison_page_list, list) { +if (page-ram_addr == ram_addr) { +return; +} +} +page = qemu_malloc(sizeof(HWPoisonPage)); +page-ram_addr = ram_addr; +QLIST_INSERT_HEAD(hwpoison_page_list, page, list); +} + static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, int *max_banks) { @@ -233,6 +266,7 @@ int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) hardware_memory_error(); } } +kvm_hwpoison_page_add(ram_addr); kvm_mce_inject(env, paddr, code); } else #endif /* KVM_CAP_MCE */ @@ -263,6 +297,7 @@ int kvm_arch_on_sigbus(int code, void *addr) QEMU itself instead of guest system!: %p\n, addr); return 0; } +kvm_hwpoison_page_add(ram_addr); kvm_mce_inject(first_cpu, paddr, code); } else #endif /* KVM_CAP_MCE */ @@ -571,6 +606,7 @@ int kvm_arch_init(KVMState *s) fprintf(stderr, e820_add_entry() table is full\n); return ret; } +qemu_register_reset(kvm_unpoison_all, NULL); return 0; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 13/15] kvm: x86: Fail kvm_arch_init_vcpu if MCE initialization fails
There is no reason to continue if the kernel claims to support MCE but then fails to process our request. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- target-i386/kvm.c | 30 +- 1 files changed, 17 insertions(+), 13 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 486efb9..44e5504 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -437,20 +437,24 @@ int kvm_arch_init_vcpu(CPUState *env) int banks; int ret; -if (kvm_get_mce_cap_supported(env-kvm_state, mcg_cap, banks)) { -perror(kvm_get_mce_cap_supported FAILED); -} else { -if (banks MCE_BANKS_DEF) -banks = MCE_BANKS_DEF; -mcg_cap = MCE_CAP_DEF; -mcg_cap |= banks; -ret = kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap); -if (ret 0) { -fprintf(stderr, KVM_X86_SETUP_MCE: %s, strerror(-ret)); -} else { -env-mcg_cap = mcg_cap; -} +ret = kvm_get_mce_cap_supported(env-kvm_state, mcg_cap, banks); +if (ret 0) { +fprintf(stderr, kvm_get_mce_cap_supported: %s, strerror(-ret)); +return ret; } + +if (banks MCE_BANKS_DEF) { +banks = MCE_BANKS_DEF; +} +mcg_cap = MCE_CAP_DEF; +mcg_cap |= banks; +ret = kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap); +if (ret 0) { +fprintf(stderr, KVM_X86_SETUP_MCE: %s, strerror(-ret)); +return ret; +} + +env-mcg_cap = mcg_cap; } #endif -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 10/15] x86: Run qemu_inject_x86_mce on target VCPU
We will use the current TCG-only MCE injection path for KVM as well, and then this read-modify-write of the target VCPU state has to be performed synchronously in the corresponding thread. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- target-i386/helper.c | 87 + 1 files changed, 58 insertions(+), 29 deletions(-) diff --git a/target-i386/helper.c b/target-i386/helper.c index e3ef40c..a32960c 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1067,29 +1067,42 @@ static void breakpoint_handler(CPUState *env) prev_debug_excp_handler(env); } -static void -qemu_inject_x86_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, -uint64_t mcg_status, uint64_t addr, uint64_t misc, -int flags) +typedef struct MCEInjectionParams { +Monitor *mon; +CPUState *env; +int bank; +uint64_t status; +uint64_t mcg_status; +uint64_t addr; +uint64_t misc; +int flags; +} MCEInjectionParams; + +static void do_inject_x86_mce(void *data) { -uint64_t mcg_cap = cenv-mcg_cap; -uint64_t *banks = cenv-mce_banks + 4 * bank; +MCEInjectionParams *params = data; +CPUState *cenv = params-env; +uint64_t *banks = cenv-mce_banks + 4 * params-bank; + +cpu_synchronize_state(cenv); /* * If there is an MCE exception being processed, ignore this SRAO MCE * unless unconditional injection was requested. */ -if (!(flags MCE_INJECT_UNCOND_AO) !(status MCI_STATUS_AR) +if (!(params-flags MCE_INJECT_UNCOND_AO) + !(params-status MCI_STATUS_AR) (cenv-mcg_status MCG_STATUS_MCIP)) { return; } -if (status MCI_STATUS_UC) { + +if (params-status MCI_STATUS_UC) { /* * if MSR_MCG_CTL is not all 1s, the uncorrected error * reporting is disabled */ -if ((mcg_cap MCG_CTL_P) cenv-mcg_ctl != ~(uint64_t)0) { -monitor_printf(mon, +if ((cenv-mcg_cap MCG_CTL_P) cenv-mcg_ctl != ~(uint64_t)0) { +monitor_printf(params-mon, CPU %d: Uncorrected error reporting disabled\n, cenv-cpu_index); return; @@ -1100,35 +1113,39 @@ qemu_inject_x86_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, * reporting is disabled for the bank */ if (banks[0] != ~(uint64_t)0) { -monitor_printf(mon, CPU %d: Uncorrected error reporting disabled - for bank %d\n, cenv-cpu_index, bank); +monitor_printf(params-mon, + CPU %d: Uncorrected error reporting disabled for +bank %d\n, + cenv-cpu_index, params-bank); return; } if ((cenv-mcg_status MCG_STATUS_MCIP) || !(cenv-cr[4] CR4_MCE_MASK)) { -monitor_printf(mon, CPU %d: Previous MCE still in progress, -raising triple fault\n, cenv-cpu_index); +monitor_printf(params-mon, + CPU %d: Previous MCE still in progress, raising +triple fault\n, + cenv-cpu_index); qemu_log_mask(CPU_LOG_RESET, Triple fault\n); qemu_system_reset_request(); return; } if (banks[1] MCI_STATUS_VAL) { -status |= MCI_STATUS_OVER; +params-status |= MCI_STATUS_OVER; } -banks[2] = addr; -banks[3] = misc; -cenv-mcg_status = mcg_status; -banks[1] = status; +banks[2] = params-addr; +banks[3] = params-misc; +cenv-mcg_status = params-mcg_status; +banks[1] = params-status; cpu_interrupt(cenv, CPU_INTERRUPT_MCE); } else if (!(banks[1] MCI_STATUS_VAL) || !(banks[1] MCI_STATUS_UC)) { if (banks[1] MCI_STATUS_VAL) { -status |= MCI_STATUS_OVER; +params-status |= MCI_STATUS_OVER; } -banks[2] = addr; -banks[3] = misc; -banks[1] = status; +banks[2] = params-addr; +banks[3] = params-misc; +banks[1] = params-status; } else { banks[1] |= MCI_STATUS_OVER; } @@ -1138,6 +1155,16 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, int flags) { +MCEInjectionParams params = { +.mon = mon, +.env = cenv, +.bank = bank, +.status = status, +.mcg_status = mcg_status, +.addr = addr, +.misc = misc, +.flags = flags, +}; unsigned bank_num = cenv-mcg_cap 0xff; CPUState *env; int flag = 0; @@ -1167,17 +1194,19 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState
[PATCH v2 09/15] kvm: x86: Inject pending MCE events on state writeback
The current way of injecting MCE events without updating of and synchronizing with the CPUState is broken and causes spurious corruptions of the MCE-related parts of the CPUState. As a first step towards a fix, enhance the state writeback code with support for injecting events that are pending in the CPUState. A pending exception will then be signaled via cpu_interrupt(CPU_INTERRUPT_MCE). And, just like for TCG, we need to leave the halt state when CPU_INTERRUPT_MCE is pending (left broken for the to-be-removed old KVM code). This will also allow to unify TCG and KVM injection code. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- target-i386/kvm.c | 60 + 1 files changed, 60 insertions(+), 0 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index a416554..939edc8 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -467,6 +467,38 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, #endif /* !KVM_CAP_MCE*/ } +static int kvm_inject_mce_oldstyle(CPUState *env) +{ +#ifdef KVM_CAP_MCE +if (!kvm_has_vcpu_events() env-exception_injected == EXCP12_MCHK) { +unsigned int bank, bank_num = env-mcg_cap 0xff; +struct kvm_x86_mce mce; + +env-exception_injected = -1; + +/* + * There must be at least one bank in use if an MCE is pending. + * Find it and use its values for the event injection. + */ +for (bank = 0; bank bank_num; bank++) { +if (env-mce_banks[bank * 4 + 1] MCI_STATUS_VAL) { +break; +} +} +assert(bank bank_num); + +mce.bank = bank; +mce.status = env-mce_banks[bank * 4 + 1]; +mce.mcg_status = env-mcg_status; +mce.addr = env-mce_banks[bank * 4 + 2]; +mce.misc = env-mce_banks[bank * 4 + 3]; + +return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, mce); +} +#endif /* KVM_CAP_MCE */ +return 0; +} + static void cpu_update_state(void *opaque, int running, int reason) { CPUState *env = opaque; @@ -1539,6 +1571,11 @@ int kvm_arch_put_registers(CPUState *env, int level) if (ret 0) { return ret; } +/* must be before kvm_put_msrs */ +ret = kvm_inject_mce_oldstyle(env); +if (ret 0) { +return ret; +} ret = kvm_put_msrs(env, level); if (ret 0) { return ret; @@ -1677,6 +1714,29 @@ void kvm_arch_post_run(CPUState *env, struct kvm_run *run) int kvm_arch_process_async_events(CPUState *env) { +if (env-interrupt_request CPU_INTERRUPT_MCE) { +/* We must not raise CPU_INTERRUPT_MCE if it's not supported. */ +assert(env-mcg_cap); + +env-interrupt_request = ~CPU_INTERRUPT_MCE; + +kvm_cpu_synchronize_state(env); + +if (env-exception_injected == EXCP08_DBLE) { +/* this means triple fault */ +qemu_system_reset_request(); +env-exit_request = 1; +return 0; +} +env-exception_injected = EXCP12_MCHK; +env-has_error_code = 0; + +env-halted = 0; +if (kvm_irqchip_in_kernel() env-mp_state == KVM_MP_STATE_HALTED) { +env-mp_state = KVM_MP_STATE_RUNNABLE; +} +} + if (kvm_irqchip_in_kernel()) { return 0; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 12/15] kvm: x86: Clean up kvm_setup_mce
There is nothing to abstract here. Fold kvm_setup_mce into its caller and fix up the error reporting (return code of kvm_vcpu_ioctl holds the error value). Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- target-i386/kvm.c | 11 --- 1 files changed, 4 insertions(+), 7 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index be896dd..486efb9 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -187,11 +187,6 @@ static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, return -ENOSYS; } -static int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap) -{ -return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap); -} - static void kvm_mce_inject(CPUState *env, target_phys_addr_t paddr, int code) { uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | @@ -440,6 +435,7 @@ int kvm_arch_init_vcpu(CPUState *env) kvm_check_extension(env-kvm_state, KVM_CAP_MCE) 0) { uint64_t mcg_cap; int banks; +int ret; if (kvm_get_mce_cap_supported(env-kvm_state, mcg_cap, banks)) { perror(kvm_get_mce_cap_supported FAILED); @@ -448,8 +444,9 @@ int kvm_arch_init_vcpu(CPUState *env) banks = MCE_BANKS_DEF; mcg_cap = MCE_CAP_DEF; mcg_cap |= banks; -if (kvm_setup_mce(env, mcg_cap)) { -perror(kvm_setup_mce FAILED); +ret = kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap); +if (ret 0) { +fprintf(stderr, KVM_X86_SETUP_MCE: %s, strerror(-ret)); } else { env-mcg_cap = mcg_cap; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 05/15] x86: Optionally avoid injecting AO MCEs while others are pending
Allow to tell cpu_x86_inject_mce that it should ignore Action Optional MCE events when the target VCPU is still processing another one. This will be used by KVM soon. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- monitor.c|7 +-- target-i386/cpu.h|5 - target-i386/helper.c | 26 +++--- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/monitor.c b/monitor.c index 662df7c..ae20927 100644 --- a/monitor.c +++ b/monitor.c @@ -2709,12 +2709,15 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) uint64_t mcg_status = qdict_get_int(qdict, mcg_status); uint64_t addr = qdict_get_int(qdict, addr); uint64_t misc = qdict_get_int(qdict, misc); -int broadcast = qdict_get_try_bool(qdict, broadcast, 0); +int flags = MCE_INJECT_UNCOND_AO; +if (qdict_get_try_bool(qdict, broadcast, 0)) { +flags |= MCE_INJECT_BROADCAST; +} for (cenv = first_cpu; cenv != NULL; cenv = cenv-next_cpu) { if (cenv-cpu_index == cpu_index) { cpu_x86_inject_mce(mon, cenv, bank, status, mcg_status, addr, misc, - broadcast); + flags); break; } } diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 486af1d..d0eae75 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -987,8 +987,11 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, void do_cpu_init(CPUState *env); void do_cpu_sipi(CPUState *env); +#define MCE_INJECT_BROADCAST1 +#define MCE_INJECT_UNCOND_AO2 + void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, -uint64_t misc, int broadcast); +uint64_t misc, int flags); #endif /* CPU_I386_H */ diff --git a/target-i386/helper.c b/target-i386/helper.c index 462d332..e3ef40c 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1069,11 +1069,20 @@ static void breakpoint_handler(CPUState *env) static void qemu_inject_x86_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, -uint64_t mcg_status, uint64_t addr, uint64_t misc) +uint64_t mcg_status, uint64_t addr, uint64_t misc, +int flags) { uint64_t mcg_cap = cenv-mcg_cap; uint64_t *banks = cenv-mce_banks + 4 * bank; +/* + * If there is an MCE exception being processed, ignore this SRAO MCE + * unless unconditional injection was requested. + */ +if (!(flags MCE_INJECT_UNCOND_AO) !(status MCI_STATUS_AR) + (cenv-mcg_status MCG_STATUS_MCIP)) { +return; +} if (status MCI_STATUS_UC) { /* * if MSR_MCG_CTL is not all 1s, the uncorrected error @@ -1127,7 +1136,7 @@ qemu_inject_x86_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, -uint64_t misc, int broadcast) +uint64_t misc, int flags) { unsigned bank_num = cenv-mcg_cap 0xff; CPUState *env; @@ -1145,27 +1154,30 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, monitor_printf(mon, Invalid MCE status code\n); return; } -if (broadcast !cpu_x86_support_mca_broadcast(cenv)) { +if ((flags MCE_INJECT_BROADCAST) + !cpu_x86_support_mca_broadcast(cenv)) { monitor_printf(mon, Guest CPU does not support MCA broadcast\n); return; } if (kvm_enabled()) { -if (broadcast) { +if (flags MCE_INJECT_BROADCAST) { flag |= MCE_BROADCAST; } kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, flag); } else { -qemu_inject_x86_mce(mon, cenv, bank, status, mcg_status, addr, misc); -if (broadcast) { +qemu_inject_x86_mce(mon, cenv, bank, status, mcg_status, addr, misc, +flags); +if (flags MCE_INJECT_BROADCAST) { for (env = first_cpu; env != NULL; env = env-next_cpu) { if (cenv == env) { continue; } qemu_inject_x86_mce(mon, env, 1, MCI_STATUS_VAL | MCI_STATUS_UC, -MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0); +MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, +flags); } } } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo
[PATCH v2 01/15] x86: Account for MCE in cpu_has_work
MCEs can be injected asynchronously, so they can also terminate the halt state. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- target-i386/exec.h | 15 ++- 1 files changed, 6 insertions(+), 9 deletions(-) diff --git a/target-i386/exec.h b/target-i386/exec.h index fc8945b..d050dd0 100644 --- a/target-i386/exec.h +++ b/target-i386/exec.h @@ -293,15 +293,12 @@ static inline void load_eflags(int eflags, int update_mask) static inline int cpu_has_work(CPUState *env) { -int work; - -work = (env-interrupt_request CPU_INTERRUPT_HARD) - (env-eflags IF_MASK); -work |= env-interrupt_request CPU_INTERRUPT_NMI; -work |= env-interrupt_request CPU_INTERRUPT_INIT; -work |= env-interrupt_request CPU_INTERRUPT_SIPI; - -return work; +return ((env-interrupt_request CPU_INTERRUPT_HARD) +(env-eflags IF_MASK)) || + (env-interrupt_request (CPU_INTERRUPT_NMI | + CPU_INTERRUPT_INIT | + CPU_INTERRUPT_SIPI | + CPU_INTERRUPT_MCE)); } static inline int cpu_halted(CPUState *env) { -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 00/15] [uq/master] Patch queue, part IV (MCE edition)
Round 2 of this part, primarily addressing review comments: - Reworked CPU_INTERRUPT_MCE - exection translation (now done in kvm_arch_process_async_events, indeed much cleaner) - Add missing cpu_synchronize_state on pending MCE events for !kvm_irqchip_in_kernel - Split up KVM MCE code switch from old to new style into two patches and dropped some unneeded variable renamings - Fixed Windows build (qemu_ram_remap is POSIX-only) Thanks for the feedback so far. CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Huang Ying ying.hu...@intel.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com Huang Ying (2): Add qemu_ram_remap KVM, MCE, unpoison memory address across reboot Jan Kiszka (13): x86: Account for MCE in cpu_has_work x86: Perform implicit mcg_status reset x86: Small cleanups of MCE helpers x86: Refine error reporting of MCE injection services x86: Optionally avoid injecting AO MCEs while others are pending Synchronize VCPU states before reset kvm: x86: Move MCE functions together kvm: Rename kvm_arch_process_irqchip_events to async_events kvm: x86: Inject pending MCE events on state writeback x86: Run qemu_inject_x86_mce on target VCPU kvm: x86: Consolidate TCG and KVM MCE injection code kvm: x86: Clean up kvm_setup_mce kvm: x86: Fail kvm_arch_init_vcpu if MCE initialization fails cpu-all.h |8 +- cpu-common.h |1 + exec.c| 63 +++- kvm-all.c |2 +- kvm.h |2 +- monitor.c | 11 +- qemu-common.h |6 +- target-i386/cpu.h | 11 +- target-i386/exec.h| 15 +- target-i386/helper.c | 185 +--- target-i386/kvm.c | 463 - target-i386/kvm_x86.h | 25 --- target-ppc/kvm.c |2 +- target-s390x/kvm.c|2 +- vl.c |1 + 15 files changed, 399 insertions(+), 398 deletions(-) delete mode 100644 target-i386/kvm_x86.h -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 04/15] x86: Refine error reporting of MCE injection services
As this service is used by the human monitor, make sure that errors get reported to the right channel, and also raise the verbosity. This requires to move Monitor typedef in qemu-common.h to resolve the include dependency. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- monitor.c|4 +- qemu-common.h|6 ++-- target-i386/cpu.h|6 ++-- target-i386/helper.c | 79 +- 4 files changed, 54 insertions(+), 41 deletions(-) diff --git a/monitor.c b/monitor.c index 45b0cc2..662df7c 100644 --- a/monitor.c +++ b/monitor.c @@ -2712,8 +2712,8 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) int broadcast = qdict_get_try_bool(qdict, broadcast, 0); for (cenv = first_cpu; cenv != NULL; cenv = cenv-next_cpu) { -if (cenv-cpu_index == cpu_index cenv-mcg_cap) { -cpu_x86_inject_mce(cenv, bank, status, mcg_status, addr, misc, +if (cenv-cpu_index == cpu_index) { +cpu_x86_inject_mce(mon, cenv, bank, status, mcg_status, addr, misc, broadcast); break; } diff --git a/qemu-common.h b/qemu-common.h index a4d9c21..6ac29cc 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -18,6 +18,9 @@ typedef struct QEMUFile QEMUFile; typedef struct QEMUBH QEMUBH; typedef struct DeviceState DeviceState; +struct Monitor; +typedef struct Monitor Monitor; + /* we put basic includes here to avoid repeating them in device drivers */ #include stdlib.h #include stdio.h @@ -324,9 +327,6 @@ void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count); -struct Monitor; -typedef struct Monitor Monitor; - /* Convert a byte between binary and BCD. */ static inline uint8_t to_bcd(uint8_t val) { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 52bb48e..486af1d 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -987,8 +987,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, void do_cpu_init(CPUState *env); void do_cpu_sipi(CPUState *env); -void cpu_x86_inject_mce(CPUState *cenv, int bank, uint64_t status, -uint64_t mcg_status, uint64_t addr, uint64_t misc, -int broadcast); +void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, +uint64_t status, uint64_t mcg_status, uint64_t addr, +uint64_t misc, int broadcast); #endif /* CPU_I386_H */ diff --git a/target-i386/helper.c b/target-i386/helper.c index ba3bed9..462d332 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -30,6 +30,7 @@ #include kvm_x86.h #ifndef CONFIG_USER_ONLY #include sysemu.h +#include monitor.h #endif //#define DEBUG_MMU @@ -1067,33 +1068,38 @@ static void breakpoint_handler(CPUState *env) } static void -qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, +qemu_inject_x86_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc) { uint64_t mcg_cap = cenv-mcg_cap; -uint64_t *banks = cenv-mce_banks; - -/* - * if MSR_MCG_CTL is not all 1s, the uncorrected error - * reporting is disabled - */ -if ((status MCI_STATUS_UC) (mcg_cap MCG_CTL_P) -cenv-mcg_ctl != ~(uint64_t)0) { -return; -} -banks += 4 * bank; -/* - * if MSR_MCi_CTL is not all 1s, the uncorrected error - * reporting is disabled for the bank - */ -if ((status MCI_STATUS_UC) banks[0] != ~(uint64_t)0) { -return; -} +uint64_t *banks = cenv-mce_banks + 4 * bank; + if (status MCI_STATUS_UC) { +/* + * if MSR_MCG_CTL is not all 1s, the uncorrected error + * reporting is disabled + */ +if ((mcg_cap MCG_CTL_P) cenv-mcg_ctl != ~(uint64_t)0) { +monitor_printf(mon, + CPU %d: Uncorrected error reporting disabled\n, + cenv-cpu_index); +return; +} + +/* + * if MSR_MCi_CTL is not all 1s, the uncorrected error + * reporting is disabled for the bank + */ +if (banks[0] != ~(uint64_t)0) { +monitor_printf(mon, CPU %d: Uncorrected error reporting disabled + for bank %d\n, cenv-cpu_index, bank); +return; +} + if ((cenv-mcg_status MCG_STATUS_MCIP) || !(cenv-cr[4] CR4_MCE_MASK)) { -fprintf(stderr, injects mce exception while previous -one is in progress!\n); +monitor_printf(mon, CPU %d: Previous MCE still in progress, +
[PATCH v2 02/15] x86: Perform implicit mcg_status reset
Reorder mcg_status in CPUState to achieve automatic clearing on reset. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- target-i386/cpu.h|3 ++- target-i386/helper.c |2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 5f1df8b..75156e7 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -687,6 +687,8 @@ typedef struct CPUX86State { uint64_t pat; +uint64_t mcg_status; + /* exception/interrupt handling */ int error_code; int exception_is_int; @@ -741,7 +743,6 @@ typedef struct CPUX86State { struct DeviceState *apic_state; uint64_t mcg_cap; -uint64_t mcg_status; uint64_t mcg_ctl; uint64_t mce_banks[MCE_BANKS_DEF*4]; diff --git a/target-i386/helper.c b/target-i386/helper.c index f0c546d..f41416f 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -101,8 +101,6 @@ void cpu_reset(CPUX86State *env) env-dr[7] = DR7_FIXED_1; cpu_breakpoint_remove_all(env, BP_CPU); cpu_watchpoint_remove_all(env, BP_CPU); - -env-mcg_status = 0; } void cpu_x86_close(CPUX86State *env) -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 03/15] x86: Small cleanups of MCE helpers
Fix some code style issues, use proper headers, and align to cpu_x86 naming scheme. No functional changes. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- cpu-all.h|4 monitor.c|2 +- target-i386/cpu.h|5 + target-i386/helper.c | 41 - 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index 87b0f86..caf5e6c 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -971,8 +971,4 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf); int cpu_memory_rw_debug(CPUState *env, target_ulong addr, uint8_t *buf, int len, int is_write); -void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, -uint64_t mcg_status, uint64_t addr, uint64_t misc, -int broadcast); - #endif /* CPU_ALL_H */ diff --git a/monitor.c b/monitor.c index 22ae3bb..45b0cc2 100644 --- a/monitor.c +++ b/monitor.c @@ -2713,7 +2713,7 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) for (cenv = first_cpu; cenv != NULL; cenv = cenv-next_cpu) { if (cenv-cpu_index == cpu_index cenv-mcg_cap) { -cpu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, +cpu_x86_inject_mce(cenv, bank, status, mcg_status, addr, misc, broadcast); break; } diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 75156e7..52bb48e 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -986,4 +986,9 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, void do_cpu_init(CPUState *env); void do_cpu_sipi(CPUState *env); + +void cpu_x86_inject_mce(CPUState *cenv, int bank, uint64_t status, +uint64_t mcg_status, uint64_t addr, uint64_t misc, +int broadcast); + #endif /* CPU_I386_H */ diff --git a/target-i386/helper.c b/target-i386/helper.c index f41416f..ba3bed9 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -28,6 +28,9 @@ #include qemu-common.h #include kvm.h #include kvm_x86.h +#ifndef CONFIG_USER_ONLY +#include sysemu.h +#endif //#define DEBUG_MMU @@ -1063,11 +1066,9 @@ static void breakpoint_handler(CPUState *env) prev_debug_excp_handler(env); } -/* This should come from sysemu.h - if we could include it here... */ -void qemu_system_reset_request(void); - -static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, -uint64_t mcg_status, uint64_t addr, uint64_t misc) +static void +qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, +uint64_t mcg_status, uint64_t addr, uint64_t misc) { uint64_t mcg_cap = cenv-mcg_cap; uint64_t *banks = cenv-mce_banks; @@ -1077,15 +1078,17 @@ static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, * reporting is disabled */ if ((status MCI_STATUS_UC) (mcg_cap MCG_CTL_P) -cenv-mcg_ctl != ~(uint64_t)0) +cenv-mcg_ctl != ~(uint64_t)0) { return; +} banks += 4 * bank; /* * if MSR_MCi_CTL is not all 1s, the uncorrected error * reporting is disabled for the bank */ -if ((status MCI_STATUS_UC) banks[0] != ~(uint64_t)0) +if ((status MCI_STATUS_UC) banks[0] != ~(uint64_t)0) { return; +} if (status MCI_STATUS_UC) { if ((cenv-mcg_status MCG_STATUS_MCIP) || !(cenv-cr[4] CR4_MCE_MASK)) { @@ -1095,8 +1098,9 @@ static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, qemu_system_reset_request(); return; } -if (banks[1] MCI_STATUS_VAL) +if (banks[1] MCI_STATUS_VAL) { status |= MCI_STATUS_OVER; +} banks[2] = addr; banks[3] = misc; cenv-mcg_status = mcg_status; @@ -1104,16 +1108,18 @@ static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, cpu_interrupt(cenv, CPU_INTERRUPT_MCE); } else if (!(banks[1] MCI_STATUS_VAL) || !(banks[1] MCI_STATUS_UC)) { -if (banks[1] MCI_STATUS_VAL) +if (banks[1] MCI_STATUS_VAL) { status |= MCI_STATUS_OVER; +} banks[2] = addr; banks[3] = misc; banks[1] = status; -} else +} else { banks[1] |= MCI_STATUS_OVER; +} } -void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, +void cpu_x86_inject_mce(CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, int broadcast) { @@ -1155,15 +1161,16 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, static void mce_init(CPUX86State
[PATCH v2 07/15] kvm: x86: Move MCE functions together
Pure function suffling to avoid multiple #ifdef KVM_CAP_MCE sections, no functional changes. While at it, annotate some #ifdef sections. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- target-i386/kvm.c | 346 ++--- 1 files changed, 171 insertions(+), 175 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 0aa0a41..f909661 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -172,7 +172,7 @@ static int get_para_features(CPUState *env) #endif return features; } -#endif +#endif /* CONFIG_KVM_PARA */ #ifdef KVM_CAP_MCE static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, @@ -273,8 +273,174 @@ static void kvm_inject_x86_mce_on(CPUState *env, struct kvm_x86_mce *mce, run_on_cpu(env, kvm_do_inject_x86_mce, data); } -static void kvm_mce_broadcast_rest(CPUState *env); -#endif +static void kvm_mce_broadcast_rest(CPUState *env) +{ +struct kvm_x86_mce mce = { +.bank = 1, +.status = MCI_STATUS_VAL | MCI_STATUS_UC, +.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, +.addr = 0, +.misc = 0, +}; +CPUState *cenv; + +/* Broadcast MCA signal for processor version 06H_EH and above */ +if (cpu_x86_support_mca_broadcast(env)) { +for (cenv = first_cpu; cenv != NULL; cenv = cenv-next_cpu) { +if (cenv == env) { +continue; +} +kvm_inject_x86_mce_on(cenv, mce, ABORT_ON_ERROR); +} +} +} + +static void kvm_mce_inj_srar_dataload(CPUState *env, target_phys_addr_t paddr) +{ +struct kvm_x86_mce mce = { +.bank = 9, +.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | MCI_STATUS_AR | 0x134, +.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV, +.addr = paddr, +.misc = (MCM_ADDR_PHYS 6) | 0xc, +}; +int r; + +r = kvm_set_mce(env, mce); +if (r 0) { +fprintf(stderr, kvm_set_mce: %s\n, strerror(errno)); +abort(); +} +kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub(CPUState *env, target_phys_addr_t paddr) +{ +struct kvm_x86_mce mce = { +.bank = 9, +.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, +.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, +.addr = paddr, +.misc = (MCM_ADDR_PHYS 6) | 0xc, +}; +int r; + +r = kvm_set_mce(env, mce); +if (r 0) { +fprintf(stderr, kvm_set_mce: %s\n, strerror(errno)); +abort(); +} +kvm_mce_broadcast_rest(env); +} + +static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr) +{ +struct kvm_x86_mce mce = { +.bank = 9, +.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0, +.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, +.addr = paddr, +.misc = (MCM_ADDR_PHYS 6) | 0xc, +}; + +kvm_inject_x86_mce_on(env, mce, ABORT_ON_ERROR); +kvm_mce_broadcast_rest(env); +} +#endif /* KVM_CAP_MCE */ + +static void hardware_memory_error(void) +{ +fprintf(stderr, Hardware memory error!\n); +exit(1); +} + +int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) +{ +#ifdef KVM_CAP_MCE +void *vaddr; +ram_addr_t ram_addr; +target_phys_addr_t paddr; + +if ((env-mcg_cap MCG_SER_P) addr + (code == BUS_MCEERR_AR +|| code == BUS_MCEERR_AO)) { +vaddr = (void *)addr; +if (qemu_ram_addr_from_host(vaddr, ram_addr) || +!kvm_physical_memory_addr_from_ram(env-kvm_state, ram_addr, paddr)) { +fprintf(stderr, Hardware memory error for memory used by +QEMU itself instead of guest system!\n); +/* Hope we are lucky for AO MCE */ +if (code == BUS_MCEERR_AO) { +return 0; +} else { +hardware_memory_error(); +} +} + +if (code == BUS_MCEERR_AR) { +/* Fake an Intel architectural Data Load SRAR UCR */ +kvm_mce_inj_srar_dataload(env, paddr); +} else { +/* + * If there is an MCE excpetion being processed, ignore + * this SRAO MCE + */ +if (!kvm_mce_in_progress(env)) { +/* Fake an Intel architectural Memory scrubbing UCR */ +kvm_mce_inj_srao_memscrub(env, paddr); +} +} +} else +#endif /* KVM_CAP_MCE */ +{ +if (code == BUS_MCEERR_AO) { +return 0; +
[PATCH v2 11/15] kvm: x86: Consolidate TCG and KVM MCE injection code
This switches KVM's MCE injection path to cpu_x86_inject_mce, both for SIGBUS and monitor initiated events. This means we prepare the MCA MSRs in the VCPUState also for KVM. We have to drop the MSRs writeback restrictions for this purpose which is now safe as every uncoordinated MSR injection is removed with this patch. Signed-off-by: Jan Kiszka jan.kis...@siemens.com CC: Huang Ying ying.hu...@intel.com CC: Hidetoshi Seto seto.hideto...@jp.fujitsu.com CC: Jin Dongming jin.dongm...@np.css.fujitsu.com --- target-i386/helper.c | 34 +++- target-i386/kvm.c | 238 +--- target-i386/kvm_x86.h | 25 - 3 files changed, 37 insertions(+), 260 deletions(-) delete mode 100644 target-i386/kvm_x86.h diff --git a/target-i386/helper.c b/target-i386/helper.c index a32960c..a08309f 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -27,7 +27,6 @@ #include exec-all.h #include qemu-common.h #include kvm.h -#include kvm_x86.h #ifndef CONFIG_USER_ONLY #include sysemu.h #include monitor.h @@ -1167,7 +1166,6 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, }; unsigned bank_num = cenv-mcg_cap 0xff; CPUState *env; -int flag = 0; if (!cenv-mcg_cap) { monitor_printf(mon, MCE injection not supported\n); @@ -1187,27 +1185,19 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, return; } -if (kvm_enabled()) { -if (flags MCE_INJECT_BROADCAST) { -flag |= MCE_BROADCAST; -} - -kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, flag); -} else { -run_on_cpu(cenv, do_inject_x86_mce, params); -if (flags MCE_INJECT_BROADCAST) { -params.bank = 1; -params.status = MCI_STATUS_VAL | MCI_STATUS_UC; -params.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; -params.addr = 0; -params.misc = 0; -for (env = first_cpu; env != NULL; env = env-next_cpu) { -if (cenv == env) { -continue; -} -params.env = env; -run_on_cpu(cenv, do_inject_x86_mce, params); +run_on_cpu(cenv, do_inject_x86_mce, params); +if (flags MCE_INJECT_BROADCAST) { +params.bank = 1; +params.status = MCI_STATUS_VAL | MCI_STATUS_UC; +params.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; +params.addr = 0; +params.misc = 0; +for (env = first_cpu; env != NULL; env = env-next_cpu) { +if (cenv == env) { +continue; } +params.env = env; +run_on_cpu(cenv, do_inject_x86_mce, params); } } } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 939edc8..be896dd 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -28,7 +28,6 @@ #include hw/pc.h #include hw/apic.h #include ioport.h -#include kvm_x86.h #ifdef CONFIG_KVM_PARA #include linux/kvm_para.h @@ -193,164 +192,23 @@ static int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap) return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap); } -static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m) +static void kvm_mce_inject(CPUState *env, target_phys_addr_t paddr, int code) { -return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m); -} - -static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n) -{ -struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs); -int r; - -kmsrs-nmsrs = n; -memcpy(kmsrs-entries, msrs, n * sizeof *msrs); -r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs); -memcpy(msrs, kmsrs-entries, n * sizeof *msrs); -free(kmsrs); -return r; -} - -/* FIXME: kill this and kvm_get_msr, use env-mcg_status instead */ -static int kvm_mce_in_progress(CPUState *env) -{ -struct kvm_msr_entry msr_mcg_status = { -.index = MSR_MCG_STATUS, -}; -int r; - -r = kvm_get_msr(env, msr_mcg_status, 1); -if (r == -1 || r == 0) { -fprintf(stderr, Failed to get MCE status\n); -return 0; -} -return !!(msr_mcg_status.data MCG_STATUS_MCIP); -} - -struct kvm_x86_mce_data -{ -CPUState *env; -struct kvm_x86_mce *mce; -int abort_on_error; -}; - -static void kvm_do_inject_x86_mce(void *_data) -{ -struct kvm_x86_mce_data *data = _data; -int r; - -/* If there is an MCE exception being processed, ignore this SRAO MCE */ -if ((data-env-mcg_cap MCG_SER_P) -!(data-mce-status MCI_STATUS_AR)) { -if (kvm_mce_in_progress(data-env)) { -return; -} -} - -r = kvm_set_mce(data-env, data-mce); -if (r 0) { -perror(kvm_set_mce FAILED); -if (data-abort_on_error) { -abort(); -} -} -} - -static void kvm_inject_x86_mce_on(CPUState *env, struct kvm_x86_mce *mce, - int flag) -{ -struct
[Bug 29382] New: Panic: Unable to handle kernel paging request
https://bugzilla.kernel.org/show_bug.cgi?id=29382 Summary: Panic: Unable to handle kernel paging request Product: Virtualization Version: unspecified Platform: All OS/Version: Linux Tree: Mainline Status: NEW Severity: high Priority: P1 Component: kvm AssignedTo: virtualization_...@kernel-bugs.osdl.org ReportedBy: se...@seblu.net Regression: No Created an attachment (id=48302) -- (https://bugzilla.kernel.org/attachment.cgi?id=48302) screeb dump ello, I have an kvm host which kernel panic frequently. extract from lshw product: PowerEdge M610 vendor: Dell Inc. version: PowerEdge M1000e product: Intel(R) Xeon(R) CPU X5650 @ 2.67GHz # uname -a Linux hkvm-cap-2 2.6.35.7 #1 SMP Mon Oct 25 15:28:33 UTC 2010 x86_64 GNU/Linux # kvm --version QEMU emulator version 0.13.0 (qemu-kvm-0.13.0), Copyright (c) 2003-2008 Fabrice Bellard # dpkg -l|grep -e libvirt ii libvirt-bin 0.8.7-1+sj1 the programs for the libvirt library ii libvirt00.8.7-1+sj1 library for interfacing with different virtualization systems ii python-libvirt 0.8.7-1+sj1 libvirt Python bindings I've take some screenshot from idrac about kernel panic trace. Do you have preconisation about this? Regards, -- Configure bugmail: https://bugzilla.kernel.org/userprefs.cgi?tab=email --- You are receiving this mail because: --- You are watching the assignee of the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Bug 29382] Panic: Unable to handle kernel paging request
https://bugzilla.kernel.org/show_bug.cgi?id=29382 --- Comment #1 from Seb Lu se...@seblu.net 2011-02-18 14:59:41 --- Created an attachment (id=48312) -- (https://bugzilla.kernel.org/attachment.cgi?id=48312) screeb dump -- Configure bugmail: https://bugzilla.kernel.org/userprefs.cgi?tab=email --- You are receiving this mail because: --- You are watching the assignee of the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Bug 29382] Panic: Unable to handle kernel paging request
https://bugzilla.kernel.org/show_bug.cgi?id=29382 --- Comment #2 from Seb Lu se...@seblu.net 2011-02-18 15:00:10 --- Created an attachment (id=48322) -- (https://bugzilla.kernel.org/attachment.cgi?id=48322) screen dump -- Configure bugmail: https://bugzilla.kernel.org/userprefs.cgi?tab=email --- You are receiving this mail because: --- You are watching the assignee of the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Bug 29382] Panic: Unable to handle kernel paging request
https://bugzilla.kernel.org/show_bug.cgi?id=29382 --- Comment #3 from Seb Lu se...@seblu.net 2011-02-18 15:00:46 --- Created an attachment (id=48332) -- (https://bugzilla.kernel.org/attachment.cgi?id=48332) screen dump -- Configure bugmail: https://bugzilla.kernel.org/userprefs.cgi?tab=email --- You are receiving this mail because: --- You are watching the assignee of the bug. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Possible netfilter-related memory corruption in 2.6.37
Am 14.02.2011 17:52, schrieb Patrick McHardy: Am 14.02.2011 17:48, schrieb Eric Dumazet: I am not sure, but I guess nf_reinject() needs a fix too ;) I agree. That one looks uglier though, I guess we'll have to iterate through all hooks to note the previous one. How about this? Unfortunately I don't think we can avoid iterating through all hooks without violating RCU rules. diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 74aebed..834bb07 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -235,6 +235,7 @@ int nf_queue(struct sk_buff *skb, void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct sk_buff *skb = entry-skb; + struct nf_hook_ops *i, *prev; struct list_head *elem = entry-elem-list; const struct nf_afinfo *afinfo; @@ -244,8 +245,21 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) { - elem = elem-prev; - verdict = NF_ACCEPT; + prev = NULL; + list_for_each_entry_rcu(i, nf_hooks[entry-pf][entry-hook], + list) { + if (i-list == elem) + break; + prev = i; + } + + if (prev == NULL || + i-list == nf_hooks[entry-pf][entry-hook]) + verdict = NF_DROP; + else { + elem = prev-list; + verdict = NF_ACCEPT; + } } if (verdict == NF_ACCEPT) {
[PATCH] KVM test: Do not load acpiphp on RHEL 6.0
Turns out hotplug for PCI devices is built in on RHEL 6.0 kernel. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests_base.cfg.sample |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 80362db..7eb3635 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -1691,6 +1691,10 @@ variants: - 6.0.i386: no setup +nic_hotplug: +modprobe_module = +block_hotplug: +modprobe_module = image_name = rhel6-32 unattended_install: unattended_file = unattended/RHEL-6-series.ks @@ -1706,6 +1710,10 @@ variants: - 6.0.x86_64: no setup +nic_hotplug: +modprobe_module = +block_hotplug: +modprobe_module = image_name = rhel6-64 unattended_install: unattended_file = unattended/RHEL-6-series.ks -- 1.7.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM test: nic_hotplug: Fix typo
Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests/nic_hotplug.py |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/tests/nic_hotplug.py b/client/tests/kvm/tests/nic_hotplug.py index 50a3ce9..5a757e7 100644 --- a/client/tests/kvm/tests/nic_hotplug.py +++ b/client/tests/kvm/tests/nic_hotplug.py @@ -76,7 +76,7 @@ def run_nic_hotplug(test, params, env): vm.monitor.cmd(device_add_cmd) qdev = vm.monitor.info(qtree) -if id not in qdev: +if nic_id not in qdev: logging.error(qdev) raise error.TestFail(Device %s was not plugged into qdev tree % nic_id) -- 1.7.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Remove ethtool from rtl8139 variant
As the original patch set don't define supported_features for it. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests/ethtool.py |6 +- client/tests/kvm/tests_base.cfg.sample |1 + 2 files changed, 6 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/tests/ethtool.py b/client/tests/kvm/tests/ethtool.py index 81e45d3..d7c6b57 100644 --- a/client/tests/kvm/tests/ethtool.py +++ b/client/tests/kvm/tests/ethtool.py @@ -191,7 +191,11 @@ def run_ethtool(test, params, env): filename = /tmp/ethtool.dd guest_ip = vm.get_address() ethname = kvm_test_utils.get_linux_ifname(session, vm.get_mac_address(0)) -supported_features = params.get(supported_features).split() +supported_features = params.get(supported_features) +if supported_features: +supported_features = supported_features.split() +else: +supported_features = [] test_matrix = { # type:(callback,(dependence), (exclude) tx: (tx_callback, (), ()), diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 7eb3635..816a94f 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -903,6 +903,7 @@ variants: variants: - @rtl8139: nic_model = rtl8139 +no ethtool jumbo: mtu = 1500 - e1000: -- 1.7.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Possible netfilter-related memory corruption in 2.6.37
Le vendredi 18 février 2011 à 19:37 +0100, Patrick McHardy a écrit : Am 14.02.2011 17:52, schrieb Patrick McHardy: Am 14.02.2011 17:48, schrieb Eric Dumazet: I am not sure, but I guess nf_reinject() needs a fix too ;) I agree. That one looks uglier though, I guess we'll have to iterate through all hooks to note the previous one. How about this? Unfortunately I don't think we can avoid iterating through all hooks without violating RCU rules. /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) { - elem = elem-prev; - verdict = NF_ACCEPT; + prev = NULL; + list_for_each_entry_rcu(i, nf_hooks[entry-pf][entry-hook], + list) { + if (i-list == elem) + break; + prev = i; Hmm... what happens if elem was the first elem in list ? We exit with prev = NULL -- NF_DROP ? I must miss something... + } + + if (prev == NULL || + i-list == nf_hooks[entry-pf][entry-hook]) + verdict = NF_DROP; + else { + elem = prev-list; + verdict = NF_ACCEPT; + } } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM test: Include start_vm = yes on nic_bonding
So the VM can be restarted to include the new NICs required for the test. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests_base.cfg.sample |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 816a94f..cfe343d 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -703,6 +703,7 @@ variants: - nic_bonding: type = nic_bonding nics += ' nic2 nic3 nic4' +start_vm = yes image_snapshot = yes serial_login = yes test_timeout = 1000 -- 1.7.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH 1/2] KVM test: make_qemu_command(): properly deal with get_mac_address() failure
If VM params define a new NIC that didn't previously exist, then when make_qemu_command() is called in order to see if the VM should be restarted, it attempts to get the MAC address of the new (nonexistent) NIC, and an exception is raised. This exception is expected and should be caught. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_vm.py |5 - 1 files changed, 4 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index 969558b..d852784 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -638,7 +638,10 @@ class VM: except IndexError: netdev_id = None # Handle the '-net nic' part -mac = vm.get_mac_address(vlan) +try: +mac = vm.get_mac_address(vlan) +except VMAddressError: +mac = None qemu_cmd += add_nic(help, vlan, nic_params.get(nic_model), mac, netdev_id, nic_params.get(nic_extra_params)) # Handle the '-net tap' or '-net user' part -- 1.7.3.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH 2/2] KVM test: kvm_vm.py: make 'nic_mac' trigger a VM restart when changed
get_mac_address() should first check if 'nic_mac' is defined and then check the address pool. This way, if 'nic_mac' is changed between tests, make_qemu_command(), which calls get_mac_address(), will reveal the change and trigger a VM restart. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_vm.py | 16 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index d852784..1ceef7a 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -850,15 +850,12 @@ class VM: for vlan in range(num_nics): nic_name = params.objects(nics)[vlan] nic_params = params.object_params(nic_name) -if nic_params.get(nic_mac, None): -mac = nic_params.get(nic_mac) +mac = (nic_params.get(nic_mac) or + mac_source and mac_source.get_mac_address(vlan)) +if mac: kvm_utils.set_mac_address(self.instance, vlan, mac) else: -mac = mac_source and mac_source.get_mac_address(vlan) -if mac: -kvm_utils.set_mac_address(self.instance, vlan, mac) -else: -kvm_utils.generate_mac_address(self.instance, vlan) +kvm_utils.generate_mac_address(self.instance, vlan) # Assign a PCI assignable device self.pci_assignable = None @@ -1233,7 +1230,10 @@ class VM: @raise VMMACAddressMissingError: If no MAC address is defined for the requested NIC -mac = kvm_utils.get_mac_address(self.instance, nic_index) +nic_name = self.params.objects(nics)[nic_index] +nic_params = self.params.object_params(nic_name) +mac = (nic_params.get(nic_mac) or + kvm_utils.get_mac_address(self.instance, nic_index)) if not mac: raise VMMACAddressMissingError(nic_index) return mac -- 1.7.3.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] kvm: fix detection of BIOS disabling VMX
From: Avi Kivity [mailto:a...@redhat.com] Sent: Thursday, February 10, 2011 2:02 AM On 02/08/2011 09:45 PM, Joseph Cihula wrote: This patch fixes the logic used to detect whether BIOS has disabled VMX. Applied, thanks. Will you request this be taken by the -stable kernel? Joe -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM test: Include start_vm = yes on nic_bonding
On Fri, 2011-02-18 at 17:34 -0200, Lucas Meneghel Rodrigues wrote: So the VM can be restarted to include the new NICs required for the test. Ignore this patch, please :) Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests_base.cfg.sample |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 816a94f..cfe343d 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -703,6 +703,7 @@ variants: - nic_bonding: type = nic_bonding nics += ' nic2 nic3 nic4' +start_vm = yes image_snapshot = yes serial_login = yes test_timeout = 1000 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Remove ethtool from rtl8139 variant
On Fri, Feb 18, 2011 at 05:14:09PM -0200, Lucas Meneghel Rodrigues wrote: As the original patch set don't define supported_features for it. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com Acked-by: Amos Kong ak...@redhat.com --- client/tests/kvm/tests/ethtool.py |6 +- client/tests/kvm/tests_base.cfg.sample |1 + 2 files changed, 6 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/tests/ethtool.py b/client/tests/kvm/tests/ethtool.py index 81e45d3..d7c6b57 100644 --- a/client/tests/kvm/tests/ethtool.py +++ b/client/tests/kvm/tests/ethtool.py @@ -191,7 +191,11 @@ def run_ethtool(test, params, env): filename = /tmp/ethtool.dd guest_ip = vm.get_address() ethname = kvm_test_utils.get_linux_ifname(session, vm.get_mac_address(0)) -supported_features = params.get(supported_features).split() +supported_features = params.get(supported_features) +if supported_features: +supported_features = supported_features.split() +else: +supported_features = [] test_matrix = { # type:(callback,(dependence), (exclude) tx: (tx_callback, (), ()), diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 7eb3635..816a94f 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -903,6 +903,7 @@ variants: variants: - @rtl8139: nic_model = rtl8139 +no ethtool jumbo: mtu = 1500 - e1000: -- 1.7.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html