On Wed, Feb 16, 2022 at 10:52:47AM +0800, Lu Baolu wrote:
The common iommu_ops is hooked to both device and domain. When a helper
has both device and domain pointer, the way to get the iommu_ops looks
messy in iommu core. This sorts out the way to get iommu_ops. The device
related helpers go through device pointer, while the domain related ones
go through domain pointer.

Signed-off-by: Lu Baolu <baolu...@linux.intel.com>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Reviewed-by: Jason Gunthorpe <j...@nvidia.com>
---
include/linux/iommu.h | 11 ++++++++++
drivers/iommu/iommu.c | 50 +++++++++++++++++++++----------------------
2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9ffa2e88f3d0..90f1b5e3809d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -381,6 +381,17 @@ static inline void iommu_iotlb_gather_init(struct 
iommu_iotlb_gather *gather)
        };
}

+static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
+{
+       /*
+        * Assume that valid ops must be installed if iommu_probe_device()
+        * has succeeded. The device ops are essentially for internal use
+        * within the IOMMU subsystem itself, so we should be able to trust
+        * ourselves not to misuse the helper.
+        */
+       return dev->iommu->iommu_dev->ops;
+}
+
#define IOMMU_GROUP_NOTIFY_ADD_DEVICE           1 /* Device added */
#define IOMMU_GROUP_NOTIFY_DEL_DEVICE           2 /* Pre Device removed */
#define IOMMU_GROUP_NOTIFY_BIND_DRIVER          3 /* Pre Driver bind */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 7cf073c56036..7af0ee670deb 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -323,13 +323,14 @@ int iommu_probe_device(struct device *dev)

void iommu_release_device(struct device *dev)
{
-       const struct iommu_ops *ops = dev->bus->iommu_ops;
+       const struct iommu_ops *ops;

        if (!dev->iommu)
                return;

        iommu_device_unlink(dev->iommu->iommu_dev, dev);

+       ops = dev_iommu_ops(dev);
        ops->release_device(dev);

        iommu_group_remove_device(dev);
@@ -833,8 +834,10 @@ static int iommu_create_device_direct_mappings(struct 
iommu_group *group,
static bool iommu_is_attach_deferred(struct iommu_domain *domain,
                                     struct device *dev)
{
-       if (domain->ops->is_attach_deferred)
-               return domain->ops->is_attach_deferred(domain, dev);
+       const struct iommu_ops *ops = dev_iommu_ops(dev);
+
+       if (ops->is_attach_deferred)
+               return ops->is_attach_deferred(domain, dev);

        return false;
}
@@ -1252,10 +1255,10 @@ int iommu_page_response(struct device *dev,
        struct iommu_fault_event *evt;
        struct iommu_fault_page_request *prm;
        struct dev_iommu *param = dev->iommu;
+       const struct iommu_ops *ops = dev_iommu_ops(dev);
        bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
-       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);

-       if (!domain || !domain->ops->page_response)
+       if (!ops->page_response)
                return -ENODEV;

        if (!param || !param->fault_param)
@@ -1296,7 +1299,7 @@ int iommu_page_response(struct device *dev,
                        msg->pasid = 0;
                }

-               ret = domain->ops->page_response(dev, evt, msg);
+               ret = ops->page_response(dev, evt, msg);
                list_del(&evt->list);
                kfree(evt);
                break;
@@ -1521,7 +1524,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group);

static int iommu_get_def_domain_type(struct device *dev)
{
-       const struct iommu_ops *ops = dev->bus->iommu_ops;
+       const struct iommu_ops *ops = dev_iommu_ops(dev);

        if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
                return IOMMU_DOMAIN_DMA;
@@ -1580,7 +1583,7 @@ static int iommu_alloc_default_domain(struct iommu_group 
*group,
 */
static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
{
-       const struct iommu_ops *ops = dev->bus->iommu_ops;
+       const struct iommu_ops *ops = dev_iommu_ops(dev);
        struct iommu_group *group;
        int ret;

@@ -1588,9 +1591,6 @@ static struct iommu_group *iommu_group_get_for_dev(struct 
device *dev)
        if (group)
                return group;

-       if (!ops)
-               return ERR_PTR(-EINVAL);
-
        group = ops->device_group(dev);
        if (WARN_ON_ONCE(group == NULL))
                return ERR_PTR(-EINVAL);
@@ -1759,10 +1759,10 @@ static int __iommu_group_dma_attach(struct iommu_group 
*group)

static int iommu_group_do_probe_finalize(struct device *dev, void *data)
{
-       struct iommu_domain *domain = data;
+       const struct iommu_ops *ops = dev_iommu_ops(dev);

-       if (domain->ops->probe_finalize)
-               domain->ops->probe_finalize(dev);
+       if (ops->probe_finalize)
+               ops->probe_finalize(dev);

        return 0;
}
@@ -2020,7 +2020,7 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);

int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
{
-       const struct iommu_ops *ops = domain->ops;
+       const struct iommu_ops *ops = dev_iommu_ops(dev);

        if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev))
                return __iommu_attach_device(domain, dev);
@@ -2579,17 +2579,17 @@ EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);

void iommu_get_resv_regions(struct device *dev, struct list_head *list)
{
-       const struct iommu_ops *ops = dev->bus->iommu_ops;
+       const struct iommu_ops *ops = dev_iommu_ops(dev);

Hi,

I'm getting panics after hunk above was applied in this patch
on ppc64le KVM guest, dev->iommu is NULL.

Can be reproduced with LTP read_all_sys test or just cat on following file:
# cat /sys/kernel/iommu_groups/0/reserved_regions

[20065.322087] BUG: Kernel NULL pointer dereference on read at 0x000000b8 [20065.323563] Faulting instruction address: 0xc000000000cb7c1c [20065.323625] Oops: Kernel access of bad area, sig: 11 [#1] [20065.323671] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries [20065.323729] Modules linked in: kvm_pr kvm vfio_iommu_spapr_tce vfio_spapr_eeh vfio vhost_net tap vhost_vsock vhost vhost_iotlb snd_seq_dummy dummy msdos minix binfmt_misc vcan can_raw nfsv3 nfs_acl nfs lockd grace fscache netfs tun brd overlay exfat vfat fat vsock_loopback vmw_vsock_virtio_transport_common vsock can_bcm can veth n_gsm pps_ldisc ppp_synctty mkiss ax25 ppp_async ppp_generic serport slcan slip slhc loop snd_hrtimer snd_seq snd_seq_device snd_timer snd soundcore pcrypt crypto_user n_hdlc tls rfkill sunrpc joydev virtio_net net_failover failover virtio_console virtio_balloon crct10dif_vpmsum fuse zram xfs virtio_blk vmx_crypto crc32c_vpmsum ipmi_devintf ipmi_msghandler [last unloaded: vcan] [20065.324308] CPU: 3 PID: 119528 Comm: read_all Not tainted 5.18.0-0.rc5.9050ba3a61a4b5b.41.test.fc37.ppc64le #1 [20065.324402] NIP: c000000000cb7c1c LR: c000000000cb7ba0 CTR: 0000000000000000 [20065.324468] REGS: c00000012a3e3660 TRAP: 0300 Not tainted (5.18.0-0.rc5.9050ba3a61a4b5b.41.test.fc37.ppc64le) [20065.324560] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44008804 XER: 00000000 [20065.324638] CFAR: 0000000000002674 DAR: 00000000000000b8 DSISR: 40000000 IRQMASK: 0 [20065.324638] GPR00: c000000000cb7ba0 c00000012a3e3900 c000000002b5a500 c00000000cd610b0 [20065.324638] GPR04: c00000000c3bb0c8 0000000000000004 c00000012a3e37ac 000000023c480000 [20065.324638] GPR08: 000000023c480000 0000000000000000 0000000000000000 a0f18a8800000000 [20065.324638] GPR12: 0000000084008800 c00000003fffae80 0000000000000000 0000000000000000 [20065.324638] GPR16: 0000000010060878 0000000000000008 00000000100607b8 c00000000c3bb058 [20065.324638] GPR20: c00000000c3bb048 c00000000a034fe0 5deadbeef0000100 5deadbeef0000122 [20065.324638] GPR24: fffffffffffff000 c00000012a3e3920 0000000000000000 c00000012a3e3930 [20065.324638] GPR28: c00000012a3e3a20 c00000000cf74c00 c0000000014ab060 c00000001a765ef0 [20065.325248] NIP [c000000000cb7c1c] iommu_get_group_resv_regions+0xcc/0x490 [20065.325310] LR [c000000000cb7ba0] iommu_get_group_resv_regions+0x50/0x490 [20065.325368] Call Trace: [20065.325391] [c00000012a3e3900] [c000000000cb7ba0] iommu_get_group_resv_regions+0x50/0x490 (unreliable) [20065.325474] [c00000012a3e39c0] [c000000000cb8024] iommu_group_show_resv_regions+0x44/0x140 [20065.325544] [c00000012a3e3a70] [c000000000cb5478] iommu_group_attr_show+0x38/0x60 [20065.325616] [c00000012a3e3a90] [c00000000070536c] sysfs_kf_seq_show+0xbc/0x1d0 [20065.325686] [c00000012a3e3b20] [c000000000702124] kernfs_seq_show+0x44/0x60 [20065.325746] [c00000012a3e3b40] [c00000000061bd2c] seq_read_iter+0x26c/0x6e0 [20065.325805] [c00000012a3e3c20] [c000000000702fd4] kernfs_fop_read_iter+0x254/0x2e0 [20065.325877] [c00000012a3e3c70] [c0000000005cf3d0] new_sync_read+0x110/0x170 [20065.325938] [c00000012a3e3d10] [c0000000005d2770] vfs_read+0x1d0/0x240 [20065.326002] [c00000012a3e3d60] [c0000000005d2ee8] ksys_read+0x78/0x130 [20065.326063] [c00000012a3e3db0] [c0000000000303f8] system_call_exception+0x198/0x3a0 [20065.326133] [c00000012a3e3e10] [c00000000000c64c] system_call_common+0xec/0x250 [20065.326205] --- interrupt: c00 at 0x7fff849dda64 [20065.326253] NIP: 00007fff849dda64 LR: 00000000100124a4 CTR: 0000000000000000 [20065.326319] REGS: c00000012a3e3e80 TRAP: 0c00 Not tainted (5.18.0-0.rc5.9050ba3a61a4b5b.41.test.fc37.ppc64le) [20065.326408] MSR: 800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28002802 XER: 00000000 [20065.326501] IRQMASK: 0 [20065.326501] GPR00: 0000000000000003 00007ffff1cb8fd0 00007fff84af6e00 0000000000000003 [20065.326501] GPR04: 00007ffff1cb9070 00000000000003ff 0000000000000000 0000000000000000 [20065.326501] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [20065.326501] GPR12: 0000000000000000 00007fff84bca640 0000000000000000 0000000000000000 [20065.326501] GPR16: 0000000010060878 0000000000000008 00000000100607b8 0000000010044cc0 [20065.326501] GPR20: 0000000000000000 0000000010044640 00007ffff1cb949a 00000000100404c8 [20065.326501] GPR24: 0000000010040140 0000000010060660 0000000010040110 0000000010040020 [20065.326501] GPR28: 0000000010060698 00007fff84880000 00007ffff1cb909b 0000000000000003 [20065.327053] NIP [00007fff849dda64] 0x7fff849dda64 [20065.327099] LR [00000000100124a4] 0x100124a4 [20065.327144] --- interrupt: c00 [20065.327179] Instruction dump: [20065.327214] 66f7f000 fbc100b0 fb210088 62d60100 3b210020 fb610098 62f70122 3b610030 [20065.327289] e8750010 fb210020 fb210028 e9230560 <e92900b8> e9290010 e9890030 2c2c0000 [20065.327367] ---[ end trace 0000000000000000 ]---
# ll /sys/kernel/iommu_groups/0/devices/
total 0
lrwxrwxrwx. 1 root root 0 May  4 03:08 0000:00:01.0 -> 
../../../../devices/pci0000:00/0000:00:01.0
lrwxrwxrwx. 1 root root 0 May  4 03:08 0000:00:02.0 -> 
../../../../devices/pci0000:00/0000:00:02.0
lrwxrwxrwx. 1 root root 0 May  4 03:08 0000:00:03.0 -> 
../../../../devices/pci0000:00/0000:00:03.0
lrwxrwxrwx. 1 root root 0 May  4 03:08 0000:00:04.0 -> 
../../../../devices/pci0000:00/0000:00:04.0

# lspci -v
00:01.0 Ethernet controller: Red Hat, Inc. Virtio network device
       Subsystem: Red Hat, Inc. Device 0001
       Device tree node: 
/sys/firmware/devicetree/base/pci@800000020000000/ethernet@1
       Flags: bus master, fast devsel, latency 0, IRQ 20, IOMMU group 0
       I/O ports at 0080 [size=32]
       Memory at 100b0002000 (32-bit, non-prefetchable) [size=4K]
       Expansion ROM at 100b0040000 [disabled] [size=256K]
       Capabilities: [40] MSI-X: Enable+ Count=3 Masked-
       Kernel driver in use: virtio-pci

00:02.0 USB controller: Apple Inc. KeyLargo/Intrepid USB (prog-if 10 [OHCI])
       Subsystem: Red Hat, Inc. QEMU Virtual Machine
       Device tree node: /sys/firmware/devicetree/base/pci@800000020000000/usb@2
       Flags: bus master, fast devsel, latency 0, IRQ 19, IOMMU group 0
       Memory at 100b0001000 (32-bit, non-prefetchable) [size=256]
       Kernel driver in use: ohci-pci

00:03.0 SCSI storage controller: Red Hat, Inc. Virtio block device
       Subsystem: Red Hat, Inc. Device 0002
       Device tree node: 
/sys/firmware/devicetree/base/pci@800000020000000/scsi@3
       Flags: bus master, fast devsel, latency 0, IRQ 18, IOMMU group 0
       I/O ports at 0040 [size=64]
       Memory at 100b0000000 (32-bit, non-prefetchable) [size=4K]
       Capabilities: [40] MSI-X: Enable+ Count=2 Masked-
       Kernel driver in use: virtio-pci

00:04.0 Unclassified device [00ff]: Red Hat, Inc. Virtio memory balloon
       Subsystem: Red Hat, Inc. Device 0005
       Device tree node: 
/sys/firmware/devicetree/base/pci@800000020000000/unknown-legacy-device@4
       Flags: bus master, fast devsel, latency 0, IRQ 17, IOMMU group 0
       I/O ports at 0020 [size=32]
       Kernel driver in use: virtio-pci

full console log from CKI test run:
 
https://s3.amazonaws.com/arr-cki-prod-trusted-artifacts/trusted-artifacts/530073525/test%20ppc64le/2407075056/artifacts/test_tasks/recipes/11913490/logs/console.log
kernel config:
 
https://s3.amazonaws.com/arr-cki-prod-trusted-artifacts/trusted-artifacts/530073525/publish%20ppc64le/2407075021/artifacts/kernel-ppc64le.config

Regards,
Jan

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to