Re: [PATCH v14 08/13] dma-iommu: Implement NESTED_MSI cookie
Hi Zenghui, On 4/7/21 9:39 AM, Zenghui Yu wrote: > Hi Eric, > > On 2021/2/24 4:56, Eric Auger wrote: >> Up to now, when the type was UNMANAGED, we used to >> allocate IOVA pages within a reserved IOVA MSI range. >> >> If both the host and the guest are exposed with SMMUs, each >> would allocate an IOVA. The guest allocates an IOVA (gIOVA) >> to map onto the guest MSI doorbell (gDB). The Host allocates >> another IOVA (hIOVA) to map onto the physical doorbell (hDB). >> >> So we end up with 2 unrelated mappings, at S1 and S2: >> S1 S2 >> gIOVA -> gDB >> hIOVA -> hDB >> >> The PCI device would be programmed with hIOVA. >> No stage 1 mapping would existing, causing the MSIs to fault. >> >> iommu_dma_bind_guest_msi() allows to pass gIOVA/gDB >> to the host so that gIOVA can be used by the host instead of >> re-allocating a new hIOVA. >> >> S1 S2 >> gIOVA -> gDB -> hDB >> >> this time, the PCI device can be programmed with the gIOVA MSI >> doorbell which is correctly mapped through both stages. >> >> Nested mode is not compatible with HW MSI regions as in that >> case gDB and hDB should have a 1-1 mapping. This check will >> be done when attaching each device to the IOMMU domain. >> >> Signed-off-by: Eric Auger > > [...] > >> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c >> index f659395e7959..d25eb7cecaa7 100644 >> --- a/drivers/iommu/dma-iommu.c >> +++ b/drivers/iommu/dma-iommu.c >> @@ -19,6 +19,7 @@ >> #include >> #include >> #include >> +#include > > Duplicated include. sure > >> #include >> #include >> #include >> @@ -29,12 +30,15 @@ >> struct iommu_dma_msi_page { >> struct list_head list; >> dma_addr_t iova; >> + dma_addr_t gpa; >> phys_addr_t phys; >> + size_t s1_granule; >> }; >> enum iommu_dma_cookie_type { >> IOMMU_DMA_IOVA_COOKIE, >> IOMMU_DMA_MSI_COOKIE, >> + IOMMU_DMA_NESTED_MSI_COOKIE, >> }; >> struct iommu_dma_cookie { >> @@ -46,6 +50,7 @@ struct iommu_dma_cookie { >> dma_addr_t msi_iova; > > msi_iova is unused in the nested mode, but we still set it to the start > address of the RESV_SW_MSI region (in iommu_get_msi_cookie()), which > looks a bit strange to me. I agree with you > >> }; >> struct list_head msi_page_list; >> + spinlock_t msi_lock; > > Should msi_lock be grabbed everywhere msi_page_list is populated? > Especially in iommu_dma_get_msi_page(), which can be invoked from the > irqchip driver. Yes I agree > >> /* Domain for flush queue callback; NULL if flush queue not in >> use */ >> struct iommu_domain *fq_domain; >> @@ -87,6 +92,7 @@ static struct iommu_dma_cookie *cookie_alloc(enum >> iommu_dma_cookie_type type) >> cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); >> if (cookie) { >> + spin_lock_init(&cookie->msi_lock); >> INIT_LIST_HEAD(&cookie->msi_page_list); >> cookie->type = type; >> } >> @@ -120,14 +126,17 @@ EXPORT_SYMBOL(iommu_get_dma_cookie); >> * >> * Users who manage their own IOVA allocation and do not want DMA >> API support, >> * but would still like to take advantage of automatic MSI >> remapping, can use >> - * this to initialise their own domain appropriately. Users should >> reserve a >> + * this to initialise their own domain appropriately. Users may >> reserve a >> * contiguous IOVA region, starting at @base, large enough to >> accommodate the >> * number of PAGE_SIZE mappings necessary to cover every MSI >> doorbell address >> - * used by the devices attached to @domain. >> + * used by the devices attached to @domain. The other way round is to >> provide >> + * usable iova pages through the iommu_dma_bind_doorbell API (nested >> stages > > s/iommu_dma_bind_doorbell/iommu_dma_bind_guest_msi/ ? correct > >> + * use case) >> */ >> int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) >> { >> struct iommu_dma_cookie *cookie; >> + int nesting, ret; >> if (domain->type != IOMMU_DOMAIN_UNMANAGED) >> return -EINVAL; >> @@ -135,7 +144,12 @@ int iommu_get_msi_cookie(struct iommu_domain >> *domain, dma_addr_t base) >> if (domain->iova_cookie) >> return -EEXIST; >> - cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); >> + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nesting); > > Redundant space. yep > >> + if (!ret && nesting) >> + cookie = cookie_alloc(IOMMU_DMA_NESTED_MSI_COOKIE); >> + else >> + cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); >> + >> if (!cookie) >> return -ENOMEM; >> @@ -156,6 +170,7 @@ void iommu_put_dma_cookie(struct iommu_domain >> *domain) >> { >> struct iommu_dma_cookie *cookie = domain->iova_cookie; >> struct iommu_dma_msi_page *msi, *tmp; >> + bool s2_unmap = false;
Re: [PATCH v14 08/13] dma-iommu: Implement NESTED_MSI cookie
Hi Eric, On 2021/2/24 4:56, Eric Auger wrote: Up to now, when the type was UNMANAGED, we used to allocate IOVA pages within a reserved IOVA MSI range. If both the host and the guest are exposed with SMMUs, each would allocate an IOVA. The guest allocates an IOVA (gIOVA) to map onto the guest MSI doorbell (gDB). The Host allocates another IOVA (hIOVA) to map onto the physical doorbell (hDB). So we end up with 2 unrelated mappings, at S1 and S2: S1 S2 gIOVA-> gDB hIOVA->hDB The PCI device would be programmed with hIOVA. No stage 1 mapping would existing, causing the MSIs to fault. iommu_dma_bind_guest_msi() allows to pass gIOVA/gDB to the host so that gIOVA can be used by the host instead of re-allocating a new hIOVA. S1 S2 gIOVA->gDB->hDB this time, the PCI device can be programmed with the gIOVA MSI doorbell which is correctly mapped through both stages. Nested mode is not compatible with HW MSI regions as in that case gDB and hDB should have a 1-1 mapping. This check will be done when attaching each device to the IOMMU domain. Signed-off-by: Eric Auger [...] diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f659395e7959..d25eb7cecaa7 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -19,6 +19,7 @@ #include #include #include +#include Duplicated include. #include #include #include @@ -29,12 +30,15 @@ struct iommu_dma_msi_page { struct list_headlist; dma_addr_t iova; + dma_addr_t gpa; phys_addr_t phys; + size_t s1_granule; }; enum iommu_dma_cookie_type { IOMMU_DMA_IOVA_COOKIE, IOMMU_DMA_MSI_COOKIE, + IOMMU_DMA_NESTED_MSI_COOKIE, }; struct iommu_dma_cookie { @@ -46,6 +50,7 @@ struct iommu_dma_cookie { dma_addr_t msi_iova; msi_iova is unused in the nested mode, but we still set it to the start address of the RESV_SW_MSI region (in iommu_get_msi_cookie()), which looks a bit strange to me. }; struct list_headmsi_page_list; + spinlock_t msi_lock; Should msi_lock be grabbed everywhere msi_page_list is populated? Especially in iommu_dma_get_msi_page(), which can be invoked from the irqchip driver. /* Domain for flush queue callback; NULL if flush queue not in use */ struct iommu_domain *fq_domain; @@ -87,6 +92,7 @@ static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (cookie) { + spin_lock_init(&cookie->msi_lock); INIT_LIST_HEAD(&cookie->msi_page_list); cookie->type = type; } @@ -120,14 +126,17 @@ EXPORT_SYMBOL(iommu_get_dma_cookie); * * Users who manage their own IOVA allocation and do not want DMA API support, * but would still like to take advantage of automatic MSI remapping, can use - * this to initialise their own domain appropriately. Users should reserve a + * this to initialise their own domain appropriately. Users may reserve a * contiguous IOVA region, starting at @base, large enough to accommodate the * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address - * used by the devices attached to @domain. + * used by the devices attached to @domain. The other way round is to provide + * usable iova pages through the iommu_dma_bind_doorbell API (nested stages s/iommu_dma_bind_doorbell/iommu_dma_bind_guest_msi/ ? + * use case) */ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { struct iommu_dma_cookie *cookie; + int nesting, ret; if (domain->type != IOMMU_DOMAIN_UNMANAGED) return -EINVAL; @@ -135,7 +144,12 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) if (domain->iova_cookie) return -EEXIST; - cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nesting); Redundant space. + if (!ret && nesting) + cookie = cookie_alloc(IOMMU_DMA_NESTED_MSI_COOKIE); + else + cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); + if (!cookie) return -ENOMEM; @@ -156,6 +170,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi, *tmp; + bool s2_unmap = false; if (!cookie) return; @@ -163,7 +178,15 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) put_iova_domain(&cookie->iovad); + if (cookie->type == IOMMU_DMA_NESTED_MSI_COOKIE) +
[PATCH v14 08/13] dma-iommu: Implement NESTED_MSI cookie
Up to now, when the type was UNMANAGED, we used to allocate IOVA pages within a reserved IOVA MSI range. If both the host and the guest are exposed with SMMUs, each would allocate an IOVA. The guest allocates an IOVA (gIOVA) to map onto the guest MSI doorbell (gDB). The Host allocates another IOVA (hIOVA) to map onto the physical doorbell (hDB). So we end up with 2 unrelated mappings, at S1 and S2: S1 S2 gIOVA-> gDB hIOVA->hDB The PCI device would be programmed with hIOVA. No stage 1 mapping would existing, causing the MSIs to fault. iommu_dma_bind_guest_msi() allows to pass gIOVA/gDB to the host so that gIOVA can be used by the host instead of re-allocating a new hIOVA. S1 S2 gIOVA->gDB->hDB this time, the PCI device can be programmed with the gIOVA MSI doorbell which is correctly mapped through both stages. Nested mode is not compatible with HW MSI regions as in that case gDB and hDB should have a 1-1 mapping. This check will be done when attaching each device to the IOMMU domain. Signed-off-by: Eric Auger --- v10 -> v11: - fix compilation if !CONFIG_IOMMU_DMA v7 -> v8: - correct iommu_dma_(un)bind_guest_msi when !CONFIG_IOMMU_DMA - Mentioned nested mode is not compatible with HW MSI regions in commit message - protect with msi_lock on unbind v6 -> v7: - removed device handle v3 -> v4: - change function names; add unregister - protect with msi_lock v2 -> v3: - also store the device handle on S1 mapping registration. This garantees we associate the associated S2 mapping binds to the correct physical MSI controller. v1 -> v2: - unmap stage2 on put() --- drivers/iommu/dma-iommu.c | 142 +- include/linux/dma-iommu.h | 16 + 2 files changed, 155 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f659395e7959..d25eb7cecaa7 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -29,12 +30,15 @@ struct iommu_dma_msi_page { struct list_headlist; dma_addr_t iova; + dma_addr_t gpa; phys_addr_t phys; + size_t s1_granule; }; enum iommu_dma_cookie_type { IOMMU_DMA_IOVA_COOKIE, IOMMU_DMA_MSI_COOKIE, + IOMMU_DMA_NESTED_MSI_COOKIE, }; struct iommu_dma_cookie { @@ -46,6 +50,7 @@ struct iommu_dma_cookie { dma_addr_t msi_iova; }; struct list_headmsi_page_list; + spinlock_t msi_lock; /* Domain for flush queue callback; NULL if flush queue not in use */ struct iommu_domain *fq_domain; @@ -87,6 +92,7 @@ static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (cookie) { + spin_lock_init(&cookie->msi_lock); INIT_LIST_HEAD(&cookie->msi_page_list); cookie->type = type; } @@ -120,14 +126,17 @@ EXPORT_SYMBOL(iommu_get_dma_cookie); * * Users who manage their own IOVA allocation and do not want DMA API support, * but would still like to take advantage of automatic MSI remapping, can use - * this to initialise their own domain appropriately. Users should reserve a + * this to initialise their own domain appropriately. Users may reserve a * contiguous IOVA region, starting at @base, large enough to accommodate the * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address - * used by the devices attached to @domain. + * used by the devices attached to @domain. The other way round is to provide + * usable iova pages through the iommu_dma_bind_doorbell API (nested stages + * use case) */ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { struct iommu_dma_cookie *cookie; + int nesting, ret; if (domain->type != IOMMU_DOMAIN_UNMANAGED) return -EINVAL; @@ -135,7 +144,12 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) if (domain->iova_cookie) return -EEXIST; - cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nesting); + if (!ret && nesting) + cookie = cookie_alloc(IOMMU_DMA_NESTED_MSI_COOKIE); + else + cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); + if (!cookie) return -ENOMEM; @@ -156,6 +170,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi, *tmp; + bool s2_unmap = false; if (!cookie) return; @@ -163,7 +178,15 @@ void iommu