Re: [Qemu-devel] [PATCH for-4.2 v10 05/15] virtio-iommu: Add the iommu regions
Hi Peter, On 8/16/19 6:00 AM, Peter Xu wrote: > On Tue, Jul 30, 2019 at 07:21:27PM +0200, Eric Auger wrote: > > [...] > >> static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t >> *config_data) >> { >> VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); >> @@ -266,6 +333,15 @@ static void virtio_iommu_device_realize(DeviceState >> *dev, Error **errp) >> virtio_add_feature(>features, VIRTIO_IOMMU_F_MAP_UNMAP); >> virtio_add_feature(>features, VIRTIO_IOMMU_F_BYPASS); >> virtio_add_feature(>features, VIRTIO_IOMMU_F_MMIO); >> + >> +memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num)); >> +s->as_by_busptr = g_hash_table_new(NULL, NULL); > > VT-d was using g_hash_table_new_full() so that potentially VTDBus can > still be freed. Here for IOMMUPCIBus allocated in > virtio_iommu_find_add_as() I think it'll be leaked if we remove > entries in the hash table? > > So I started to wonder whether PCI/PCIe buses are allowed to be > plugged/unplugged after all because I never tried. With latest > 5.3.0-rc4 guest I gave it a shot and I see the error below. It could > be something that I did wrong or it could be simply that it's not > working at all. Have you tried anything like that? Michael/Alex? I have never tried this on my end. However looking at docs/pcie_pci_bridge.txt it seems possible to hotplug a pcie_pci_bridge downstream to a pcie-root-port under specific conditions (see limitations section). So I guess the situation you describe may happen. I switched to _full version. Thanks Eric > > bin=x86_64-softmmu/qemu-system-x86_64 > $bin -M q35,accel=kvm,kernel-irqchip=on -smp 8 -m 2G -cpu host \ > -monitor telnet::,server,nowait -nographic \ > -device e1000,netdev=net0 \ > -netdev user,id=net0,hostfwd=tcp::-:22 \ > -device pcie-pci-bridge,bus=pcie.0,id=pci.1 \ > -drive file=/images/default.qcow2,if=none,cache=none,id=drive0 \ > -device virtio-blk,drive=drive0 > > (qemu) device_add pci-bridge,bus=pci.1,id=pci.2,chassis_nr=1,addr=1.0 > > [ 66.172352] pci :01:01.0: [1b36:0001] type 01 class 0x060400 > [ 66.176897] pci :01:01.0: reg 0x10: [mem 0x-0x00ff 64bit] > [ 66.186130] pci :01:01.0: No bus number available for hot-added bridge > [ 66.189489] shpchp :00:03.0: BAR 14: assigned [mem > 0x8000-0x800f] > [ 66.193235] pci :01:01.0: BAR 0: assigned [mem 0x8000-0x80ff > 64bit] > [ 66.198587] shpchp :00:03.0: PCI bridge to [bus 01] > [ 66.204113] shpchp :00:03.0: bridge window [mem > 0x8000-0x800f] > [ 66.215212] shpchp :01:01.0: HPC vendor_id 1b36 device_id 1 ss_vid 0 > ss_did 0 > [ 66.218531] shpchp :01:01.0: enabling device ( -> 0002) > [ 66.229204] BUG: kernel NULL pointer dereference, address: 00e2 > [ 66.232124] #PF: supervisor write access in kernel mode > [ 66.234369] #PF: error_code(0x0002) - not-present page > [ 66.236585] PGD 0 P4D 0 > [ 66.237431] Oops: 0002 [#1] SMP PTI > [ 66.238617] CPU: 2 PID: 277 Comm: kworker/2:1 Kdump: loaded Not tainted > 5.3.0-rc4 #85 > [ 66.241200] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS > rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 > [ 66.244916] Workqueue: shpchp-1 shpchp_pushbutton_thread > [ 66.246583] RIP: 0010:shpc_init.cold+0x5c3/0x8a1 > [ 66.248041] Code: 24 90 01 00 00 8b 49 08 40 80 fe 02 0f 85 f4 01 00 00 f7 > c1 00 00 00 f0 0f 84 b2 01 00 00 b9 13 00 00 00 80 3d 33 40 38 02 00 <88> 8a > e26 > [ 66.253771] RSP: 0018:c925bb68 EFLAGS: 00010246 > [ 66.255418] RAX: 00ff RBX: RCX: > > [ 66.257763] RDX: RSI: 826bcd01 RDI: > 826bcd60 > [ 66.260065] RBP: R08: 0001 R09: > > [ 66.263184] R10: 0005 R11: R12: > 888032425400 > [ 66.265706] R13: c917109c R14: 888033da7000 R15: > 001f > [ 66.268200] FS: () GS:88807fc8() > knlGS: > [ 66.270826] CS: 0010 DS: ES: CR0: 80050033 > [ 66.272731] CR2: 00e2 CR3: 33afc002 CR4: > 00360ee0 > [ 66.275373] DR0: DR1: DR2: > > [ 66.277947] DR3: DR6: fffe0ff0 DR7: > 0400 > [ 66.279965] Call Trace: > [ 66.280627] shpc_probe+0x91/0x32b > [ 66.281644] local_pci_probe+0x42/0x80 > [ 66.282752] pci_device_probe+0x107/0x1a0 > [ 66.283877] really_probe+0xf0/0x380 > [ 66.284862] driver_probe_device+0x59/0xd0 > [ 66.285988] ? driver_allows_async_probing+0x50/0x50 > [ 66.287937] bus_for_each_drv+0x7e/0xc0 > [ 66.289752] __device_attach+0xe1/0x160 > [ 66.292076] pci_bus_add_device+0x4b/0x70 > [ 66.295244] pci_bus_add_devices+0x2c/0x64 > [ 66.297429] shpchp_configure_device+0xc1/0xe0 > [
Re: [Qemu-devel] [PATCH for-4.2 v10 05/15] virtio-iommu: Add the iommu regions
On Tue, Jul 30, 2019 at 07:21:27PM +0200, Eric Auger wrote: [...] > static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) > { > VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); > @@ -266,6 +333,15 @@ static void virtio_iommu_device_realize(DeviceState > *dev, Error **errp) > virtio_add_feature(>features, VIRTIO_IOMMU_F_MAP_UNMAP); > virtio_add_feature(>features, VIRTIO_IOMMU_F_BYPASS); > virtio_add_feature(>features, VIRTIO_IOMMU_F_MMIO); > + > +memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num)); > +s->as_by_busptr = g_hash_table_new(NULL, NULL); VT-d was using g_hash_table_new_full() so that potentially VTDBus can still be freed. Here for IOMMUPCIBus allocated in virtio_iommu_find_add_as() I think it'll be leaked if we remove entries in the hash table? So I started to wonder whether PCI/PCIe buses are allowed to be plugged/unplugged after all because I never tried. With latest 5.3.0-rc4 guest I gave it a shot and I see the error below. It could be something that I did wrong or it could be simply that it's not working at all. Have you tried anything like that? Michael/Alex? bin=x86_64-softmmu/qemu-system-x86_64 $bin -M q35,accel=kvm,kernel-irqchip=on -smp 8 -m 2G -cpu host \ -monitor telnet::,server,nowait -nographic \ -device e1000,netdev=net0 \ -netdev user,id=net0,hostfwd=tcp::-:22 \ -device pcie-pci-bridge,bus=pcie.0,id=pci.1 \ -drive file=/images/default.qcow2,if=none,cache=none,id=drive0 \ -device virtio-blk,drive=drive0 (qemu) device_add pci-bridge,bus=pci.1,id=pci.2,chassis_nr=1,addr=1.0 [ 66.172352] pci :01:01.0: [1b36:0001] type 01 class 0x060400 [ 66.176897] pci :01:01.0: reg 0x10: [mem 0x-0x00ff 64bit] [ 66.186130] pci :01:01.0: No bus number available for hot-added bridge [ 66.189489] shpchp :00:03.0: BAR 14: assigned [mem 0x8000-0x800f] [ 66.193235] pci :01:01.0: BAR 0: assigned [mem 0x8000-0x80ff 64bit] [ 66.198587] shpchp :00:03.0: PCI bridge to [bus 01] [ 66.204113] shpchp :00:03.0: bridge window [mem 0x8000-0x800f] [ 66.215212] shpchp :01:01.0: HPC vendor_id 1b36 device_id 1 ss_vid 0 ss_did 0 [ 66.218531] shpchp :01:01.0: enabling device ( -> 0002) [ 66.229204] BUG: kernel NULL pointer dereference, address: 00e2 [ 66.232124] #PF: supervisor write access in kernel mode [ 66.234369] #PF: error_code(0x0002) - not-present page [ 66.236585] PGD 0 P4D 0 [ 66.237431] Oops: 0002 [#1] SMP PTI [ 66.238617] CPU: 2 PID: 277 Comm: kworker/2:1 Kdump: loaded Not tainted 5.3.0-rc4 #85 [ 66.241200] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 66.244916] Workqueue: shpchp-1 shpchp_pushbutton_thread [ 66.246583] RIP: 0010:shpc_init.cold+0x5c3/0x8a1 [ 66.248041] Code: 24 90 01 00 00 8b 49 08 40 80 fe 02 0f 85 f4 01 00 00 f7 c1 00 00 00 f0 0f 84 b2 01 00 00 b9 13 00 00 00 80 3d 33 40 38 02 00 <88> 8a e26 [ 66.253771] RSP: 0018:c925bb68 EFLAGS: 00010246 [ 66.255418] RAX: 00ff RBX: RCX: [ 66.257763] RDX: RSI: 826bcd01 RDI: 826bcd60 [ 66.260065] RBP: R08: 0001 R09: [ 66.263184] R10: 0005 R11: R12: 888032425400 [ 66.265706] R13: c917109c R14: 888033da7000 R15: 001f [ 66.268200] FS: () GS:88807fc8() knlGS: [ 66.270826] CS: 0010 DS: ES: CR0: 80050033 [ 66.272731] CR2: 00e2 CR3: 33afc002 CR4: 00360ee0 [ 66.275373] DR0: DR1: DR2: [ 66.277947] DR3: DR6: fffe0ff0 DR7: 0400 [ 66.279965] Call Trace: [ 66.280627] shpc_probe+0x91/0x32b [ 66.281644] local_pci_probe+0x42/0x80 [ 66.282752] pci_device_probe+0x107/0x1a0 [ 66.283877] really_probe+0xf0/0x380 [ 66.284862] driver_probe_device+0x59/0xd0 [ 66.285988] ? driver_allows_async_probing+0x50/0x50 [ 66.287937] bus_for_each_drv+0x7e/0xc0 [ 66.289752] __device_attach+0xe1/0x160 [ 66.292076] pci_bus_add_device+0x4b/0x70 [ 66.295244] pci_bus_add_devices+0x2c/0x64 [ 66.297429] shpchp_configure_device+0xc1/0xe0 [ 66.299692] board_added+0x117/0x240 [ 66.301589] shpchp_enable_slot+0x121/0x2e0 [ 66.303686] shpchp_pushbutton_thread+0x70/0xa0 [ 66.305941] process_one_work+0x221/0x500 [ 66.308253] worker_thread+0x50/0x3b0 [ 66.310512] kthread+0xfb/0x130 [ 66.312422] ? process_one_work+0x500/0x500 [ 66.314617] ? kthread_park+0x80/0x80 [ 66.316489] ret_from_fork+0x3a/0x50 [ 66.318293] Modules linked in: intel_rapl_msr intel_rapl_common kvm_intel kvm crct10dif_pclmul bochs_drm crc32_pclmul drm_vram_helper ghash_clmulni_intel o [
[Qemu-devel] [PATCH for-4.2 v10 05/15] virtio-iommu: Add the iommu regions
This patch initializes the iommu memory regions so that PCIe end point transactions get translated. The translation function is not yet implemented though. Signed-off-by: Eric Auger --- v9 -> v10: - remove pc/virt machine headers - virtio_iommu_find_add_as: mr_index introduced in that patch and name properly freed v6 -> v7: - use primary_bus - rebase on new translate proto featuring iommu_idx v5 -> v6: - include qapi/error.h - fix g_hash_table_lookup key in virtio_iommu_find_add_as v4 -> v5: - use PCI bus handle as a key - use get_primary_pci_bus() callback v3 -> v4: - add trace_virtio_iommu_init_iommu_mr v2 -> v3: - use IOMMUMemoryRegion - iommu mr name built with BDF - rename smmu_get_sid into virtio_iommu_get_sid and use PCI_BUILD_BDF --- hw/virtio/trace-events | 2 + hw/virtio/virtio-iommu.c | 92 include/hw/virtio/virtio-iommu.h | 2 + 3 files changed, 96 insertions(+) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index c7276116e7..b32169d56c 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -65,3 +65,5 @@ virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d" virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 +virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d" +virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s" diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 658249c81e..1610e2f773 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -22,6 +22,8 @@ #include "qemu-common.h" #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" +#include "qapi/error.h" +#include "qemu/error-report.h" #include "trace.h" #include "standard-headers/linux/virtio_ids.h" @@ -33,6 +35,50 @@ /* Max size */ #define VIOMMU_DEFAULT_QUEUE_SIZE 256 +static inline uint16_t virtio_iommu_get_sid(IOMMUDevice *dev) +{ +return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); +} + +static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, + int devfn) +{ +VirtIOIOMMU *s = opaque; +IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); +static uint32_t mr_index; +IOMMUDevice *sdev; + +if (!sbus) { +sbus = g_malloc0(sizeof(IOMMUPciBus) + + sizeof(IOMMUDevice *) * IOMMU_PCI_DEVFN_MAX); +sbus->bus = bus; +g_hash_table_insert(s->as_by_busptr, bus, sbus); +} + +sdev = sbus->pbdev[devfn]; +if (!sdev) { +char *name = g_strdup_printf("%s-%d-%d", + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + mr_index++, devfn); +sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice)); + +sdev->viommu = s; +sdev->bus = bus; +sdev->devfn = devfn; + +trace_virtio_iommu_init_iommu_mr(name); + +memory_region_init_iommu(>iommu_mr, sizeof(sdev->iommu_mr), + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + OBJECT(s), name, + UINT64_MAX); +address_space_init(>as, + MEMORY_REGION(>iommu_mr), TYPE_VIRTIO_IOMMU); +g_free(name); +} +return >as; +} + static int virtio_iommu_attach(VirtIOIOMMU *s, struct virtio_iommu_req_attach *req) { @@ -192,6 +238,27 @@ out: } } +static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, +IOMMUAccessFlags flag, +int iommu_idx) +{ +IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); +uint32_t sid; + +IOMMUTLBEntry entry = { +.target_as = _space_memory, +.iova = addr, +.translated_addr = addr, +.addr_mask = ~(hwaddr)0, +.perm = IOMMU_NONE, +}; + +sid = virtio_iommu_get_sid(sdev); + +trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); +return entry; +} + static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) { VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); @@ -266,6 +333,15 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) virtio_add_feature(>features, VIRTIO_IOMMU_F_MAP_UNMAP); virtio_add_feature(>features, VIRTIO_IOMMU_F_BYPASS); virtio_add_feature(>features, VIRTIO_IOMMU_F_MMIO); + +memset(s->as_by_bus_num, 0,