Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-17 Thread Jan Beulich
>>> On 15.04.15 at 19:41,  wrote:
> On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
>> >>> On 10.04.15 at 22:02,  wrote:
>> > On Wed, Mar 25, 2015 at 04:39:49PM +, Jan Beulich wrote:
>> >> As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
>> >> better way") and its broken predecessor, make sure we don't access the
>> >> MSI-X table without having enabled MSI-X first, using the mask-all flag
>> >> instead to prevent interrupts from occurring.
>> > 
>> > This causes an regression with an Linux guest that has the XSA120 + XSA120
>> > addendum with PV guests (hadn't tried yet HVM).
>> 
>> You mentioning XSA-120 and its addendum - are these requirements
>> for the problem to be seen? I admit I may have tested a PV guest
>> only with an SR-IOV VF (and only a HVM guest also with an "ordinary"
>> device), but I'd like to be clear about the validity of the connection.
> 
> No. I just tried with v4.0-rc5 (and then also v4.0) and just 
> using SR-IOV to make this simpler.
> 
> With staging  + two of your patches:
> a10cc68 TODO: drop //temp-s
> 1b8721c x86/MSI-X: be more careful during teardown
> 
> When trying to enable SR-IOV I get this error:
> 
> failed to echo 1 > 
> /sys/devices/pci:00/:00:01.0/:0a:00.0/sriov_numvfs, rc: 1
> (hadn't tried just passing in an HVM guest).
> 
> Attached is the 'xl dmesg'.

Could you replace the patch I handed you earlier on by this one
and try again? I actually was able to determine that I did try a
(SUSE) PV guest without seeing an issue. I just now tried again,
and I don't see either of the two debug warnings. So quite clear
any indication towards a pvops problem.

Jan

x86/MSI-X: access MSI-X table only after having enabled MSI-X

As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
better way") and its broken predecessor, make sure we don't access the
MSI-X table without having enabled MSI-X first, using the mask-all flag
instead to prevent interrupts from occurring.

Signed-off-by: Jan Beulich 
---
v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled

--- unstable.orig/xen/arch/x86/msi.c
+++ unstable/xen/arch/x86/msi.c
@@ -142,6 +142,21 @@ static bool_t memory_decoded(const struc
   PCI_COMMAND_MEMORY);
 }
 
+static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
+{
+u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn),
+  PCI_FUNC(dev->devfn), msix_control_reg(pos));
+
+if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+{//temp
+ static bool_t warned;
+ WARN_ON(!test_and_set_bool(warned));
+return 0;
+}
+
+return memory_decoded(dev);
+}
+
 /*
  * MSI message composition
  */
@@ -219,7 +234,8 @@ static bool_t read_msi_msg(struct msi_de
 void __iomem *base;
 base = entry->mask_base;
 
-if ( unlikely(!memory_decoded(entry->dev)) )
+if ( unlikely(!msix_memory_decoded(entry->dev,
+   entry->msi_attrib.pos)) )
 return 0;
 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -285,7 +301,8 @@ static int write_msi_msg(struct msi_desc
 void __iomem *base;
 base = entry->mask_base;
 
-if ( unlikely(!memory_decoded(entry->dev)) )
+if ( unlikely(!msix_memory_decoded(entry->dev,
+   entry->msi_attrib.pos)) )
 return -ENXIO;
 writel(msg->address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -379,7 +396,7 @@ static bool_t msi_set_mask_bit(struct ir
 {
 struct msi_desc *entry = desc->msi_desc;
 struct pci_dev *pdev;
-u16 seg;
+u16 seg, control;
 u8 bus, slot, func;
 
 ASSERT(spin_is_locked(&desc->lock));
@@ -401,35 +418,38 @@ static bool_t msi_set_mask_bit(struct ir
 }
 break;
 case PCI_CAP_ID_MSIX:
+control = pci_conf_read16(seg, bus, slot, func,
+  msix_control_reg(entry->msi_attrib.pos));
+if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+PCI_MSIX_FLAGS_MASKALL));
 if ( likely(memory_decoded(pdev)) )
 {
 writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
 readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-break;
+if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
+break;
+flag = 1;
 }
-if ( flag )
+else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
 {
-u16 control;
 domid_t domid = pdev->domain->domain_id;
 
-control = pci

Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-17 Thread Jan Beulich
>>> On 16.04.15 at 20:21,  wrote:
> What kernel are you using? Or better yet - what branch/tree could
> one find it at?

As always, I'm personally using my own kernel, not in any branch/tree.
But for all purposes here, our "Kernel-of-the-Day" should be good
enough, i.e. the stuff under
http://download.opensuse.org/repositories/Kernel:/HEAD/standard/.

But anyway I hope to get you a replacement patch later today for
the one I had previously handed you with some debugging left in.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-16 Thread Konrad Rzeszutek Wilk
On Thu, Apr 16, 2015 at 08:43:30AM +0100, Jan Beulich wrote:
> >>> On 15.04.15 at 19:41,  wrote:
> > On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
> >> You mentioning XSA-120 and its addendum - are these requirements
> >> for the problem to be seen? I admit I may have tested a PV guest
> >> only with an SR-IOV VF (and only a HVM guest also with an "ordinary"
> >> device), but I'd like to be clear about the validity of the connection.
> > 
> > No. I just tried with v4.0-rc5 (and then also v4.0) and just 
> > using SR-IOV to make this simpler.
> 
> Good.
> 
> > With staging  + two of your patches:
> > a10cc68 TODO: drop //temp-s
> > 1b8721c x86/MSI-X: be more careful during teardown
> > 
> > When trying to enable SR-IOV I get this error:
> 
> Okay, this definitely works for me, albeit I know I had to do
> adjustments to avoid running into the (debug) warning you've
> hit. But (looking at the call stack) it surely would be a mistake to
> set up an MSI-X IRQ on the device without first enabling MSI-X
> on the it (i.e. the error returned could be considered legitimate).
> While we may want the hypervisor to cope with this (by enabling
> MSI-X on this path, which I'd have to add to that patch), is this
> hypervisor change perhaps uncovering a pv-ops kernel issue (in
> that other than what drivers/pci/msi.c does as of the commit
> mentioned in the description of that second patch some Xen-
> specific path fails to enable MSI-X before setting up any of the
> entries)?

Everything is possible :-)

What kernel are you using? Or better yet - what branch/tree could
one find it at?
> 
> Jan
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-16 Thread Jan Beulich
>>> On 15.04.15 at 19:41,  wrote:
> On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
>> You mentioning XSA-120 and its addendum - are these requirements
>> for the problem to be seen? I admit I may have tested a PV guest
>> only with an SR-IOV VF (and only a HVM guest also with an "ordinary"
>> device), but I'd like to be clear about the validity of the connection.
> 
> No. I just tried with v4.0-rc5 (and then also v4.0) and just 
> using SR-IOV to make this simpler.

Good.

> With staging  + two of your patches:
> a10cc68 TODO: drop //temp-s
> 1b8721c x86/MSI-X: be more careful during teardown
> 
> When trying to enable SR-IOV I get this error:

Okay, this definitely works for me, albeit I know I had to do
adjustments to avoid running into the (debug) warning you've
hit. But (looking at the call stack) it surely would be a mistake to
set up an MSI-X IRQ on the device without first enabling MSI-X
on the it (i.e. the error returned could be considered legitimate).
While we may want the hypervisor to cope with this (by enabling
MSI-X on this path, which I'd have to add to that patch), is this
hypervisor change perhaps uncovering a pv-ops kernel issue (in
that other than what drivers/pci/msi.c does as of the commit
mentioned in the description of that second patch some Xen-
specific path fails to enable MSI-X before setting up any of the
entries)?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-15 Thread Konrad Rzeszutek Wilk
On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
> >>> On 10.04.15 at 22:02,  wrote:
> > On Wed, Mar 25, 2015 at 04:39:49PM +, Jan Beulich wrote:
> >> As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
> >> better way") and its broken predecessor, make sure we don't access the
> >> MSI-X table without having enabled MSI-X first, using the mask-all flag
> >> instead to prevent interrupts from occurring.
> > 
> > This causes an regression with an Linux guest that has the XSA120 + XSA120
> > addendum with PV guests (hadn't tried yet HVM).
> 
> You mentioning XSA-120 and its addendum - are these requirements
> for the problem to be seen? I admit I may have tested a PV guest
> only with an SR-IOV VF (and only a HVM guest also with an "ordinary"
> device), but I'd like to be clear about the validity of the connection.

No. I just tried with v4.0-rc5 (and then also v4.0) and just 
using SR-IOV to make this simpler.

With staging  + two of your patches:
a10cc68 TODO: drop //temp-s
1b8721c x86/MSI-X: be more careful during teardown

When trying to enable SR-IOV I get this error:

failed to echo 1 > 
/sys/devices/pci:00/:00:01.0/:0a:00.0/sriov_numvfs, rc: 1
(hadn't tried just passing in an HVM guest).

Attached is the 'xl dmesg'.

Please keep in mind that if I do this based on

70a3cbb x86/vMSI-X: honor all mask requests

or 
df9f567 x86/vMSI-X: add valid bits for read acceleration

It works and I can setup VFs (and do PCI operations).


Here is the 'xl dmesg' output.

 Xen 4.6-unstable
(XEN) Xen version 4.6-unstable (konrad@(none)) (gcc (GCC) 4.4.4 20100503 (Red 
Hat 4.4.4-2)) debug=y Wed Apr 15 11:44:44 EDT 2015
(XEN) Latest ChangeSet: Wed Apr 15 11:43:31 2015 -0400 git:a10cc68
(XEN) Bootloader: unknown
(XEN) Command line: com1=115200,8n1 dom0_mem=999M,max:1232M dom0_max_vcpus=3 
cpufreq=performance,verbose iommu=verbose,no-intremap console=com1,vga 
loglvl=all guest_loglvl=all
(XEN) Video information:
(XEN)  VGA is text mode 80x25, font 8x16
(XEN)  VBE/DDC methods: none; EDID transfer time: 2 seconds
(XEN)  EDID info not retrieved because no DDC retrieval method detected
(XEN) Disc information:
(XEN)  Found 2 MBR signatures
(XEN)  Found 2 EDD information structures
(XEN) Xen-e820 RAM map:
(XEN)   - 0009e800 (usable)
(XEN)  0009e800 - 000a (reserved)
(XEN)  000e6000 - 0010 (reserved)
(XEN)  0010 - bf77 (usable)
(XEN)  bf77 - bf77e000 (ACPI data)
(XEN)  bf77e000 - bf7d (ACPI NVS)
(XEN)  bf7d - bf7e (reserved)
(XEN)  bf7ec000 - c000 (reserved)
(XEN)  e000 - f000 (reserved)
(XEN)  fee0 - fee01000 (reserved)
(XEN)  ffc0 - 0001 (reserved)
(XEN)  0001 - 00034000 (usable)
(XEN) ACPI: RSDP 000FA180, 0024 (r2 ACPIAM)
(XEN) ACPI: XSDT BF770100, 006C (r1 SMCI20111028 MSFT   97)
(XEN) ACPI: FACP BF770290, 00F4 (r4 102811 FACP1450 20111028 MSFT   97)
(XEN) ACPI: DSDT BF7706C0, 5CBB (r2  1F280 1F280 INTL 20051117)
(XEN) ACPI: FACS BF77E000, 0040
(XEN) ACPI: APIC BF770390, 0136 (r2 102811 APIC1450 20111028 MSFT   97)
(XEN) ACPI: MCFG BF7704D0, 003C (r1 102811 OEMMCFG  20111028 MSFT   97)
(XEN) ACPI: SLIT BF770510, 0030 (r1 102811 OEMSLIT  20111028 MSFT   97)
(XEN) ACPI: OEMB BF77E040, 0092 (r1 102811 OEMB1450 20111028 MSFT   97)
(XEN) ACPI: SRAT BF77A6C0, 01A8 (r2 102811 OEMSRAT 1 INTL1)
(XEN) ACPI: HPET BF77A870, 0038 (r1 102811 OEMHPET  20111028 MSFT   97)
(XEN) ACPI: DMAR BF77E0E0, 0120 (r1AMI  OEMDMAR1 MSFT   97)
(XEN) ACPI: SSDT BF781720, 0363 (r1 DpgPmmCpuPm   12 INTL 20051117)
(XEN) System RAM: 12279MB (12573752kB)
(XEN) SRAT: PXM 0 -> APIC 0 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 2 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 4 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 6 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 1 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 3 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 5 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 7 -> Node 0
(XEN) SRAT: PXM 1 -> APIC 16 -> Node 1
(XEN) SRAT: PXM 1 -> APIC 18 -> Node 1
(XEN) SRAT: PXM 1 -> APIC 20 -> Node 1
(XEN) SRAT: PXM 1 -> APIC 22 -> Node 1
(XEN) SRAT: PXM 1 -> APIC 17 -> Node 1
(XEN) SRAT: PXM 1 -> APIC 19 -> Node 1
(XEN) SRAT: PXM 1 -> APIC 21 -> Node 1
(XEN) SRAT: PXM 1 -> APIC 23 -> Node 1
(XEN) SRAT: Node 1 PXM 1 0-a
(XEN) SRAT: Node 1 PXM 1 10-c000
(XEN) SRAT: Node 1 PXM 1 1-34000
(XEN) NUMA: Allocated memnodemap from 338f4f000 - 338f53000
(XEN) NUMA: Using 8 for the hash shift.
(XEN) SRAT: Node 0 has no memory. BIOS Bug or mis-configured hardware?
(XEN) Domain heap initialised DMA width 9 bits
(XEN) found SMP MP-table at 000ff780
(XEN) DMI present.
(XEN) Using APIC driver default
(XEN) ACPI: PM-Timer IO Port: 0x808
(XEN) ACPI: SLEEP INFO: pm1x_cnt[1:804,1:0], pm1x_evt[1:800,1

Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-13 Thread Jan Beulich
>>> On 10.04.15 at 22:02,  wrote:
> On Wed, Mar 25, 2015 at 04:39:49PM +, Jan Beulich wrote:
>> As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
>> better way") and its broken predecessor, make sure we don't access the
>> MSI-X table without having enabled MSI-X first, using the mask-all flag
>> instead to prevent interrupts from occurring.
> 
> This causes an regression with an Linux guest that has the XSA120 + XSA120
> addendum with PV guests (hadn't tried yet HVM).

You mentioning XSA-120 and its addendum - are these requirements
for the problem to be seen? I admit I may have tested a PV guest
only with an SR-IOV VF (and only a HVM guest also with an "ordinary"
device), but I'd like to be clear about the validity of the connection.

> When PV guest requests an MSI-X, pciback gets:
> 
> [  122.778654] xen-pciback: :0a:00.0: enable MSI-X
> [  122.778861] pciback :0a:00.0: xen map irq failed -6 for 1 domain
> [  122.778887] xen_pciback: :0a:00.0: error enabling MSI-X for guest 1: 
> err -6!

Yeah, there were a few adjustments necessary to get similar issues
under control for HVM guests - maybe there's a path left not covered
by what's done for HVM guests.

> The device has the PCI_COMMAND enabled correctly:
> 
> # lspci -s 0a:0.0 -vvv | head
> 0a:00.0 Ethernet controller: Intel Corporation 82576 Gigabit Network 
> Connection (rev 01)
> Subsystem: Super Micro Computer Inc Device 10c9
> Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- 
> Stepping- SERR- FastB2B- DisINTx-
> 
> Attaching the 'xl dmesg', 'dmesg', and 'lspci'

Right - MSI-X is still disabled if the lspci really matches the state at
the time the failure occurred. I'm attaching the original patch with
some debugging code left in, which I suppose would show where
the problematic path is in case you can get to it before I would.

Jan


TODO: drop //temp-s

As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a
better way") and its broken predecessor, make sure we don't access the
MSI-X table without having enabled MSI-X first, using the mask-all flag
instead to prevent interrupts from occurring.

--- unstable.orig/xen/arch/x86/msi.c2015-02-10 08:19:17.0 +0100
+++ unstable/xen/arch/x86/msi.c 2015-02-10 09:05:12.0 +0100
@@ -142,6 +142,23 @@ static bool_t memory_decoded(const struc
   PCI_COMMAND_MEMORY);
 }
 
+static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
+{
+u16 control = pci_conf_read16(dev->seg, dev->bus,
+  PCI_SLOT(dev->devfn),
+  PCI_FUNC(dev->devfn),
+  msix_control_reg(pos));
+
+if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
+{//temp
+ static bool_t warned;
+ WARN_ON(!test_and_set_bool(warned));
+return 0;
+}
+
+return memory_decoded(dev);
+}
+
 /*
  * MSI message composition
  */
@@ -219,7 +236,8 @@ static bool_t read_msi_msg(struct msi_de
 void __iomem *base;
 base = entry->mask_base;
 
-if ( unlikely(!memory_decoded(entry->dev)) )
+if ( unlikely(!msix_memory_decoded(entry->dev,
+   entry->msi_attrib.pos)) )
 return 0;
 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -285,7 +303,8 @@ static int write_msi_msg(struct msi_desc
 void __iomem *base;
 base = entry->mask_base;
 
-if ( unlikely(!memory_decoded(entry->dev)) )
+if ( unlikely(!msix_memory_decoded(entry->dev,
+   entry->msi_attrib.pos)) )
 return -ENXIO;
 writel(msg->address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -379,7 +398,7 @@ static bool_t msi_set_mask_bit(struct ir
 {
 struct msi_desc *entry = desc->msi_desc;
 struct pci_dev *pdev;
-u16 seg;
+u16 seg, control;
 u8 bus, slot, func;
 
 ASSERT(spin_is_locked(&desc->lock));
@@ -401,35 +420,38 @@ static bool_t msi_set_mask_bit(struct ir
 }
 break;
 case PCI_CAP_ID_MSIX:
+control = pci_conf_read16(seg, bus, slot, func,
+  msix_control_reg(entry->msi_attrib.pos));
+if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
+pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry->msi_attrib.pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+PCI_MSIX_FLAGS_MASKALL));
 if ( likely(memory_decoded(pdev)) )
 {
 writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
 readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-break;
+if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
+break;
+flag