Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
>>> On 28.06.16 at 09:06, wrote: > On June 27, 2016 11:21 PM, Jan Beulich wrote: >> >>> On 27.06.16 at 14:56, wrote: >> > On June 27, 2016 4:24 PM, Jan Beulich wrote: >> >> >>> On 24.06.16 at 07:51, wrote: >> >> > @@ -199,24 +199,73 @@ static int __must_check >> >> queue_invalidate_wait(struct iommu *iommu, >> >> > return -EOPNOTSUPP; >> >> > } >> >> > >> >> > -static int __must_check invalidate_sync(struct iommu *iommu, >> >> > -bool_t flush_dev_iotlb) >> >> > +static int __must_check invalidate_sync(struct iommu *iommu) >> >> > { >> >> > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); >> >> > >> >> > ASSERT(qi_ctrl->qinval_maddr); >> >> > >> >> > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); >> >> > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); } >> >> > + >> >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, >> >> > + struct pci_dev *pdev) { >> >> > +struct domain *d = NULL; >> >> > + >> >> > +if ( test_bit(did, iommu->domid_bitmap) ) >> >> > +d = rcu_lock_domain_by_id(iommu->domid_map[did]); >> >> > + >> >> > +/* >> >> > + * In case the domain has been freed or the IOMMU domid bitmap is >> >> > + * not valid, the device no longer belongs to this domain. >> >> > + */ >> >> > +if ( d == NULL ) >> >> > +return; >> >> > + >> >> > +pcidevs_lock(); >> >> > +ASSERT(pdev->domain); >> >> > +list_del(&pdev->domain_list); >> >> > +pdev->domain = NULL; >> >> > +pci_hide_existing_device(pdev); >> >> > +pcidevs_unlock(); >> >> > + >> >> > +if ( !d->is_shutting_down && printk_ratelimit() ) >> >> > +printk(XENLOG_WARNING VTDPREFIX >> >> > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", >> >> > + d->domain_id, pdev->seg, pdev->bus, >> >> > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); >> >> > + >> >> > +if ( !is_hardware_domain(d) ) >> >> > +domain_crash(d); >> >> > + >> >> > +rcu_unlock_domain(d); >> >> > +} >> >> >> >> So in an earlier patch in this series you (supposedly) moved similar >> >> logic up to the vendor independent layer. I think this then would >> >> better get moved up too, if at all possible. >> >> >> > >> > To be honest, I have not much reason for leaving domain crash here and >> > I was aware of this problem, but crash_domain() here is not harmful >> > (as the 'd->is_shutting_down' is Set when to crash, and once the 'd- >> >is_shutting_down' >> > is Set then return in domain_shutdown() ). >> > In case crash domain directly, it may help us narrow down the 'window' >> > (the domain is still running).. >> > >> > To me, moving the logic up is acceptable. >> > >> > In next version, could I only drop: >> > >> > +if ( !is_hardware_domain(d) ) >> > +domain_crash(d); >> > >> > In this patch, and leave the rest as is ? >> >> Not really - the entire function looks like it could move out of vtd/, as I >> can't >> see anything VT-d specific in it. >> > > Yes, it could be out of vtd, and then benefit arm/amd IOMMU to hide ATS > device. > > But 'did' and 'iommu->domid_bitmap' are really vtd specific. Both of them > are > to get domain* structure, not a big deal, and then I can use domain_id > instead. > > IMO, the domain* structure is a must here, I agree, I did overlook that domain lookup. The common code function should be passed a struct domain *, as you suggest. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
On June 27, 2016 11:21 PM, Jan Beulich wrote: > >>> On 27.06.16 at 14:56, wrote: > > On June 27, 2016 4:24 PM, Jan Beulich wrote: > >> >>> On 24.06.16 at 07:51, wrote: > >> > @@ -199,24 +199,73 @@ static int __must_check > >> queue_invalidate_wait(struct iommu *iommu, > >> > return -EOPNOTSUPP; > >> > } > >> > > >> > -static int __must_check invalidate_sync(struct iommu *iommu, > >> > -bool_t flush_dev_iotlb) > >> > +static int __must_check invalidate_sync(struct iommu *iommu) > >> > { > >> > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); > >> > > >> > ASSERT(qi_ctrl->qinval_maddr); > >> > > >> > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); > >> > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); } > >> > + > >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, > >> > + struct pci_dev *pdev) { > >> > +struct domain *d = NULL; > >> > + > >> > +if ( test_bit(did, iommu->domid_bitmap) ) > >> > +d = rcu_lock_domain_by_id(iommu->domid_map[did]); > >> > + > >> > +/* > >> > + * In case the domain has been freed or the IOMMU domid bitmap is > >> > + * not valid, the device no longer belongs to this domain. > >> > + */ > >> > +if ( d == NULL ) > >> > +return; > >> > + > >> > +pcidevs_lock(); > >> > +ASSERT(pdev->domain); > >> > +list_del(&pdev->domain_list); > >> > +pdev->domain = NULL; > >> > +pci_hide_existing_device(pdev); > >> > +pcidevs_unlock(); > >> > + > >> > +if ( !d->is_shutting_down && printk_ratelimit() ) > >> > +printk(XENLOG_WARNING VTDPREFIX > >> > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", > >> > + d->domain_id, pdev->seg, pdev->bus, > >> > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); > >> > + > >> > +if ( !is_hardware_domain(d) ) > >> > +domain_crash(d); > >> > + > >> > +rcu_unlock_domain(d); > >> > +} > >> > >> So in an earlier patch in this series you (supposedly) moved similar > >> logic up to the vendor independent layer. I think this then would > >> better get moved up too, if at all possible. > >> > > > > To be honest, I have not much reason for leaving domain crash here and > > I was aware of this problem, but crash_domain() here is not harmful > > (as the 'd->is_shutting_down' is Set when to crash, and once the 'd- > >is_shutting_down' > > is Set then return in domain_shutdown() ). > > In case crash domain directly, it may help us narrow down the 'window' > > (the domain is still running).. > > > > To me, moving the logic up is acceptable. > > > > In next version, could I only drop: > > > > +if ( !is_hardware_domain(d) ) > > +domain_crash(d); > > > > In this patch, and leave the rest as is ? > > Not really - the entire function looks like it could move out of vtd/, as I > can't > see anything VT-d specific in it. > Yes, it could be out of vtd, and then benefit arm/amd IOMMU to hide ATS device. But 'did' and 'iommu->domid_bitmap' are really vtd specific. Both of them are to get domain* structure, not a big deal, and then I can use domain_id instead. IMO, the domain* structure is a must here, As mentioned, not all of call trees of device iotlb flush are under pcidevs_lock, (.i.e ...--iommu_iotlb_flush()-- xenmem_add_to_physmap()... ) In extreme cases, the domain may has been freed or the device may has been detached or even attached to another domain ( I also need to add 'if (pdev->domain == d )' before to hide device). the domain* structure can help us check above cases. Quan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
>>> On 27.06.16 at 14:56, wrote: > On June 27, 2016 4:24 PM, Jan Beulich wrote: >> >>> On 24.06.16 at 07:51, wrote: >> > @@ -199,24 +199,73 @@ static int __must_check >> queue_invalidate_wait(struct iommu *iommu, >> > return -EOPNOTSUPP; >> > } >> > >> > -static int __must_check invalidate_sync(struct iommu *iommu, >> > -bool_t flush_dev_iotlb) >> > +static int __must_check invalidate_sync(struct iommu *iommu) >> > { >> > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); >> > >> > ASSERT(qi_ctrl->qinval_maddr); >> > >> > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); >> > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); } >> > + >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, >> > + struct pci_dev *pdev) { >> > +struct domain *d = NULL; >> > + >> > +if ( test_bit(did, iommu->domid_bitmap) ) >> > +d = rcu_lock_domain_by_id(iommu->domid_map[did]); >> > + >> > +/* >> > + * In case the domain has been freed or the IOMMU domid bitmap is >> > + * not valid, the device no longer belongs to this domain. >> > + */ >> > +if ( d == NULL ) >> > +return; >> > + >> > +pcidevs_lock(); >> > +ASSERT(pdev->domain); >> > +list_del(&pdev->domain_list); >> > +pdev->domain = NULL; >> > +pci_hide_existing_device(pdev); >> > +pcidevs_unlock(); >> > + >> > +if ( !d->is_shutting_down && printk_ratelimit() ) >> > +printk(XENLOG_WARNING VTDPREFIX >> > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", >> > + d->domain_id, pdev->seg, pdev->bus, >> > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); >> > + >> > +if ( !is_hardware_domain(d) ) >> > +domain_crash(d); >> > + >> > +rcu_unlock_domain(d); >> > +} >> >> So in an earlier patch in this series you (supposedly) moved similar logic >> up to >> the vendor independent layer. I think this then would better get moved up >> too, if at all possible. >> > > To be honest, I have not much reason for leaving domain crash here and I was > aware of this problem, but crash_domain() here is not harmful (as the > 'd->is_shutting_down' is Set when to crash, and once the > 'd->is_shutting_down' > is Set then return in domain_shutdown() ). > In case crash domain directly, it may help us narrow down the 'window' (the > domain is still running).. > > To me, moving the logic up is acceptable. > > In next version, could I only drop: > > +if ( !is_hardware_domain(d) ) > +domain_crash(d); > > In this patch, and leave the rest as is ? Not really - the entire function looks like it could move out of vtd/, as I can't see anything VT-d specific in it. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
On June 27, 2016 4:24 PM, Jan Beulich wrote: > >>> On 24.06.16 at 07:51, wrote: > > @@ -199,24 +199,73 @@ static int __must_check > queue_invalidate_wait(struct iommu *iommu, > > return -EOPNOTSUPP; > > } > > > > -static int __must_check invalidate_sync(struct iommu *iommu, > > -bool_t flush_dev_iotlb) > > +static int __must_check invalidate_sync(struct iommu *iommu) > > { > > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); > > > > ASSERT(qi_ctrl->qinval_maddr); > > > > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); > > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); } > > + > > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, > > + struct pci_dev *pdev) { > > +struct domain *d = NULL; > > + > > +if ( test_bit(did, iommu->domid_bitmap) ) > > +d = rcu_lock_domain_by_id(iommu->domid_map[did]); > > + > > +/* > > + * In case the domain has been freed or the IOMMU domid bitmap is > > + * not valid, the device no longer belongs to this domain. > > + */ > > +if ( d == NULL ) > > +return; > > + > > +pcidevs_lock(); > > +ASSERT(pdev->domain); > > +list_del(&pdev->domain_list); > > +pdev->domain = NULL; > > +pci_hide_existing_device(pdev); > > +pcidevs_unlock(); > > + > > +if ( !d->is_shutting_down && printk_ratelimit() ) > > +printk(XENLOG_WARNING VTDPREFIX > > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", > > + d->domain_id, pdev->seg, pdev->bus, > > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); > > + > > +if ( !is_hardware_domain(d) ) > > +domain_crash(d); > > + > > +rcu_unlock_domain(d); > > +} > > So in an earlier patch in this series you (supposedly) moved similar logic up > to > the vendor independent layer. I think this then would better get moved up > too, if at all possible. > To be honest, I have not much reason for leaving domain crash here and I was aware of this problem, but crash_domain() here is not harmful (as the 'd->is_shutting_down' is Set when to crash, and once the 'd->is_shutting_down' is Set then return in domain_shutdown() ). In case crash domain directly, it may help us narrow down the 'window' (the domain is still running).. To me, moving the logic up is acceptable. In next version, could I only drop: +if ( !is_hardware_domain(d) ) +domain_crash(d); In this patch, and leave the rest as is ? Quan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
>>> On 24.06.16 at 07:51, wrote: > @@ -199,24 +199,73 @@ static int __must_check queue_invalidate_wait(struct > iommu *iommu, > return -EOPNOTSUPP; > } > > -static int __must_check invalidate_sync(struct iommu *iommu, > -bool_t flush_dev_iotlb) > +static int __must_check invalidate_sync(struct iommu *iommu) > { > struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); > > ASSERT(qi_ctrl->qinval_maddr); > > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); > +} > + > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, > + struct pci_dev *pdev) > +{ > +struct domain *d = NULL; > + > +if ( test_bit(did, iommu->domid_bitmap) ) > +d = rcu_lock_domain_by_id(iommu->domid_map[did]); > + > +/* > + * In case the domain has been freed or the IOMMU domid bitmap is > + * not valid, the device no longer belongs to this domain. > + */ > +if ( d == NULL ) > +return; > + > +pcidevs_lock(); > +ASSERT(pdev->domain); > +list_del(&pdev->domain_list); > +pdev->domain = NULL; > +pci_hide_existing_device(pdev); > +pcidevs_unlock(); > + > +if ( !d->is_shutting_down && printk_ratelimit() ) > +printk(XENLOG_WARNING VTDPREFIX > + " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", > + d->domain_id, pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); > + > +if ( !is_hardware_domain(d) ) > +domain_crash(d); > + > +rcu_unlock_domain(d); > +} So in an earlier patch in this series you (supposedly) moved similar logic up to the vendor independent layer. I think this then would better get moved up too, if at all possible. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
>>> On 26.06.16 at 11:18, wrote: > On June 24, 2016 7:55 PM, Tian, Kevin wrote: >> > From: Xu, Quan >> > Sent: Friday, June 24, 2016 1:52 PM >> > diff --git a/xen/drivers/passthrough/vtd/extern.h >> > b/xen/drivers/passthrough/vtd/extern.h >> > index 45357f2..efaff28 100644 >> > --- a/xen/drivers/passthrough/vtd/extern.h >> > +++ b/xen/drivers/passthrough/vtd/extern.h >> > @@ -25,6 +25,7 @@ >> > >> > #define VTDPREFIX "[VT-D]" >> > >> > +struct pci_ats_dev; >> > extern bool_t rwbf_quirk; >> > >> > void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@ >> > int dev_invalidate_iotlb(struct iommu *iommu, u16 did, >> > u64 addr, unsigned int size_order, u64 >> > type); >> > >> > int __must_check qinval_device_iotlb_sync(struct iommu *iommu, >> > - u32 max_invs_pend, >> > - u16 sid, u16 size, u64 addr); >> > + struct pci_ats_dev *ats_dev, >> > + u16 did, u16 size, u64 >> > + addr); >> > >> > unsigned int get_cache_line_size(void); void cacheline_flush(char >> > *); diff --git a/xen/drivers/passthrough/vtd/qinval.c >> > b/xen/drivers/passthrough/vtd/qinval.c >> > index 4492b29..e4e2771 100644 >> > --- a/xen/drivers/passthrough/vtd/qinval.c >> > +++ b/xen/drivers/passthrough/vtd/qinval.c >> > @@ -27,11 +27,11 @@ >> > #include "dmar.h" >> > #include "vtd.h" >> > #include "extern.h" >> > +#include "../ats.h" >> >> Earlier you said: >> >1. a forward declaration struct pci_ats_dev*, instead of >> > including ats.h. >> > > This context is 'in extern.h', but.. > >> But above you still have ats.h included. >> > > .. I really need to include 'ats.h' here, as the 'struct pci_ats_dev*' is > used in this file. Used as in "a variable of this type, or a pointer to this type de-referenced", or only as in "a variable/parameter of pointer to this type declared"? In the latter case you don't need the include. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
On June 24, 2016 7:55 PM, Tian, Kevin wrote: > > From: Xu, Quan > > Sent: Friday, June 24, 2016 1:52 PM > > diff --git a/xen/drivers/passthrough/vtd/extern.h > > b/xen/drivers/passthrough/vtd/extern.h > > index 45357f2..efaff28 100644 > > --- a/xen/drivers/passthrough/vtd/extern.h > > +++ b/xen/drivers/passthrough/vtd/extern.h > > @@ -25,6 +25,7 @@ > > > > #define VTDPREFIX "[VT-D]" > > > > +struct pci_ats_dev; > > extern bool_t rwbf_quirk; > > > > void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@ > > int dev_invalidate_iotlb(struct iommu *iommu, u16 did, > > u64 addr, unsigned int size_order, u64 > > type); > > > > int __must_check qinval_device_iotlb_sync(struct iommu *iommu, > > - u32 max_invs_pend, > > - u16 sid, u16 size, u64 addr); > > + struct pci_ats_dev *ats_dev, > > + u16 did, u16 size, u64 > > + addr); > > > > unsigned int get_cache_line_size(void); void cacheline_flush(char > > *); diff --git a/xen/drivers/passthrough/vtd/qinval.c > > b/xen/drivers/passthrough/vtd/qinval.c > > index 4492b29..e4e2771 100644 > > --- a/xen/drivers/passthrough/vtd/qinval.c > > +++ b/xen/drivers/passthrough/vtd/qinval.c > > @@ -27,11 +27,11 @@ > > #include "dmar.h" > > #include "vtd.h" > > #include "extern.h" > > +#include "../ats.h" > > Earlier you said: > >1. a forward declaration struct pci_ats_dev*, instead of > > including ats.h. > This context is 'in extern.h', but.. > But above you still have ats.h included. > .. I really need to include 'ats.h' here, as the 'struct pci_ats_dev*' is used in this file. > > > > #define VTD_QI_TIMEOUT 1 > > > > -static int __must_check invalidate_sync(struct iommu *iommu, > > -bool_t flush_dev_iotlb); > > +static int __must_check invalidate_sync(struct iommu *iommu); > > I don't understand the rationale behind. In earlier patch you introduce a new > parameter which is however just removed later here > In earlier patch, refactor invalidate_sync() to indicate whether we need to flush device IOTLB or not. change it back here, as I add a specific function - dev_invalidate_sync() for device IOTLB invalidation.. Quan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
>>> On 24.06.16 at 13:55, wrote: >> --- a/xen/drivers/passthrough/pci.c >> +++ b/xen/drivers/passthrough/pci.c >> @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct >> pci_dev *pdev) >> xfree(pdev); >> } >> >> -static void _pci_hide_device(struct pci_dev *pdev) >> +void pci_hide_existing_device(struct pci_dev *pdev) > > Don't understand what is the meaning of "hiding existing device". > Is there case where you may want to hide a non-existing device? The distinction is whether alloc_pdev() needs calling, as can be seen ... >> @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn) >> pdev = alloc_pdev(get_pseg(0), bus, devfn); >> if ( pdev ) >> { >> -_pci_hide_device(pdev); >> +pci_hide_existing_device(pdev); >> rc = 0; >> } ... as the beginning context of this hunk. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
> From: Xu, Quan > Sent: Friday, June 24, 2016 1:52 PM > > From: Quan Xu > > If Device-TLB flush timed out, we hide the target ATS device > immediately and crash the domain owning this ATS device. If > impacted domain is hardware domain, just throw out a warning. > > By hiding the device, we make sure it can't be assigned to any > domain any longer (see device_assigned). > > Signed-off-by: Quan Xu > > CC: Jan Beulich > CC: Kevin Tian > CC: Feng Wu > > --- > v12: >1. a forward declaration struct pci_ats_dev*, instead of > including ats.h. >2. eliminate the loop. >3. use the same logic for logging and crashing as I did in > other series (despite I have moved the domain crash logic > up to the generic IOMMU layer, I think I am better still > leave it as is). >4. enhance dev_invalidate_sync() with ASSERT(). > --- > xen/drivers/passthrough/pci.c | 6 +-- > xen/drivers/passthrough/vtd/extern.h | 5 ++- > xen/drivers/passthrough/vtd/qinval.c | 75 > +-- > xen/drivers/passthrough/vtd/x86/ats.c | 9 + > xen/include/xen/pci.h | 1 + > 5 files changed, 71 insertions(+), 25 deletions(-) > > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > index 98936f55c..843dc88 100644 > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct > pci_dev *pdev) > xfree(pdev); > } > > -static void _pci_hide_device(struct pci_dev *pdev) > +void pci_hide_existing_device(struct pci_dev *pdev) Don't understand what is the meaning of "hiding existing device". Is there case where you may want to hide a non-existing device? > { > if ( pdev->domain ) > return; > @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn) > pdev = alloc_pdev(get_pseg(0), bus, devfn); > if ( pdev ) > { > -_pci_hide_device(pdev); > +pci_hide_existing_device(pdev); > rc = 0; > } > pcidevs_unlock(); > @@ -466,7 +466,7 @@ int __init pci_ro_device(int seg, int bus, int devfn) > } > > __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map); > -_pci_hide_device(pdev); > +pci_hide_existing_device(pdev); > > return 0; > } > diff --git a/xen/drivers/passthrough/vtd/extern.h > b/xen/drivers/passthrough/vtd/extern.h > index 45357f2..efaff28 100644 > --- a/xen/drivers/passthrough/vtd/extern.h > +++ b/xen/drivers/passthrough/vtd/extern.h > @@ -25,6 +25,7 @@ > > #define VTDPREFIX "[VT-D]" > > +struct pci_ats_dev; > extern bool_t rwbf_quirk; > > void print_iommu_regs(struct acpi_drhd_unit *drhd); > @@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did, > u64 addr, unsigned int size_order, u64 type); > > int __must_check qinval_device_iotlb_sync(struct iommu *iommu, > - u32 max_invs_pend, > - u16 sid, u16 size, u64 addr); > + struct pci_ats_dev *ats_dev, > + u16 did, u16 size, u64 addr); > > unsigned int get_cache_line_size(void); > void cacheline_flush(char *); > diff --git a/xen/drivers/passthrough/vtd/qinval.c > b/xen/drivers/passthrough/vtd/qinval.c > index 4492b29..e4e2771 100644 > --- a/xen/drivers/passthrough/vtd/qinval.c > +++ b/xen/drivers/passthrough/vtd/qinval.c > @@ -27,11 +27,11 @@ > #include "dmar.h" > #include "vtd.h" > #include "extern.h" > +#include "../ats.h" Earlier you said: >1. a forward declaration struct pci_ats_dev*, instead of > including ats.h. But above you still have ats.h included. > > #define VTD_QI_TIMEOUT 1 > > -static int __must_check invalidate_sync(struct iommu *iommu, > -bool_t flush_dev_iotlb); > +static int __must_check invalidate_sync(struct iommu *iommu); I don't understand the rationale behind. In earlier patch you introduce a new parameter which is however just removed later here Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue
From: Quan Xu If Device-TLB flush timed out, we hide the target ATS device immediately and crash the domain owning this ATS device. If impacted domain is hardware domain, just throw out a warning. By hiding the device, we make sure it can't be assigned to any domain any longer (see device_assigned). Signed-off-by: Quan Xu CC: Jan Beulich CC: Kevin Tian CC: Feng Wu --- v12: 1. a forward declaration struct pci_ats_dev*, instead of including ats.h. 2. eliminate the loop. 3. use the same logic for logging and crashing as I did in other series (despite I have moved the domain crash logic up to the generic IOMMU layer, I think I am better still leave it as is). 4. enhance dev_invalidate_sync() with ASSERT(). --- xen/drivers/passthrough/pci.c | 6 +-- xen/drivers/passthrough/vtd/extern.h | 5 ++- xen/drivers/passthrough/vtd/qinval.c | 75 +-- xen/drivers/passthrough/vtd/x86/ats.c | 9 + xen/include/xen/pci.h | 1 + 5 files changed, 71 insertions(+), 25 deletions(-) diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 98936f55c..843dc88 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev) xfree(pdev); } -static void _pci_hide_device(struct pci_dev *pdev) +void pci_hide_existing_device(struct pci_dev *pdev) { if ( pdev->domain ) return; @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn) pdev = alloc_pdev(get_pseg(0), bus, devfn); if ( pdev ) { -_pci_hide_device(pdev); +pci_hide_existing_device(pdev); rc = 0; } pcidevs_unlock(); @@ -466,7 +466,7 @@ int __init pci_ro_device(int seg, int bus, int devfn) } __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map); -_pci_hide_device(pdev); +pci_hide_existing_device(pdev); return 0; } diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h index 45357f2..efaff28 100644 --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -25,6 +25,7 @@ #define VTDPREFIX "[VT-D]" +struct pci_ats_dev; extern bool_t rwbf_quirk; void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type); int __must_check qinval_device_iotlb_sync(struct iommu *iommu, - u32 max_invs_pend, - u16 sid, u16 size, u64 addr); + struct pci_ats_dev *ats_dev, + u16 did, u16 size, u64 addr); unsigned int get_cache_line_size(void); void cacheline_flush(char *); diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c index 4492b29..e4e2771 100644 --- a/xen/drivers/passthrough/vtd/qinval.c +++ b/xen/drivers/passthrough/vtd/qinval.c @@ -27,11 +27,11 @@ #include "dmar.h" #include "vtd.h" #include "extern.h" +#include "../ats.h" #define VTD_QI_TIMEOUT 1 -static int __must_check invalidate_sync(struct iommu *iommu, -bool_t flush_dev_iotlb); +static int __must_check invalidate_sync(struct iommu *iommu); static void print_qi_regs(struct iommu *iommu) { @@ -103,7 +103,7 @@ static int __must_check queue_invalidate_context_sync(struct iommu *iommu, unmap_vtd_domain_page(qinval_entries); -return invalidate_sync(iommu, 0); +return invalidate_sync(iommu); } static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu, @@ -140,7 +140,7 @@ static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu, qinval_update_qtail(iommu, index); spin_unlock_irqrestore(&iommu->register_lock, flags); -return invalidate_sync(iommu, 0); +return invalidate_sync(iommu); } static int __must_check queue_invalidate_wait(struct iommu *iommu, @@ -199,24 +199,73 @@ static int __must_check queue_invalidate_wait(struct iommu *iommu, return -EOPNOTSUPP; } -static int __must_check invalidate_sync(struct iommu *iommu, -bool_t flush_dev_iotlb) +static int __must_check invalidate_sync(struct iommu *iommu) { struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); ASSERT(qi_ctrl->qinval_maddr); -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); +return queue_invalidate_wait(iommu, 0, 1, 1, 0); +} + +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did, + struct pci_dev *pdev) +{ +struct domain *d = NULL; + +if ( test_bit(did, iommu->domid_bitmap) ) +d = rcu_lock_domain_by_id(iommu->domid_map[did]); + +/* + * In case the domain h