Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-17 Thread Jan Beulich
 On 16.04.15 at 20:21, konrad.w...@oracle.com wrote:
 What kernel are you using? Or better yet - what branch/tree could
 one find it at?

As always, I'm personally using my own kernel, not in any branch/tree.
But for all purposes here, our Kernel-of-the-Day should be good
enough, i.e. the stuff under
http://download.opensuse.org/repositories/Kernel:/HEAD/standard/.

But anyway I hope to get you a replacement patch later today for
the one I had previously handed you with some debugging left in.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-17 Thread Jan Beulich
 On 15.04.15 at 19:41, konrad.w...@oracle.com wrote:
 On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
  On 10.04.15 at 22:02, konrad.w...@oracle.com wrote:
  On Wed, Mar 25, 2015 at 04:39:49PM +, Jan Beulich wrote:
  As done in Linux by f598282f51 (PCI: Fix the NIU MSI-X problem in a
  better way) and its broken predecessor, make sure we don't access the
  MSI-X table without having enabled MSI-X first, using the mask-all flag
  instead to prevent interrupts from occurring.
  
  This causes an regression with an Linux guest that has the XSA120 + XSA120
  addendum with PV guests (hadn't tried yet HVM).
 
 You mentioning XSA-120 and its addendum - are these requirements
 for the problem to be seen? I admit I may have tested a PV guest
 only with an SR-IOV VF (and only a HVM guest also with an ordinary
 device), but I'd like to be clear about the validity of the connection.
 
 No. I just tried with v4.0-rc5 (and then also v4.0) and just 
 using SR-IOV to make this simpler.
 
 With staging  + two of your patches:
 a10cc68 TODO: drop //temp-s
 1b8721c x86/MSI-X: be more careful during teardown
 
 When trying to enable SR-IOV I get this error:
 
 failed to echo 1  
 /sys/devices/pci:00/:00:01.0/:0a:00.0/sriov_numvfs, rc: 1
 (hadn't tried just passing in an HVM guest).
 
 Attached is the 'xl dmesg'.

Could you replace the patch I handed you earlier on by this one
and try again? I actually was able to determine that I did try a
(SUSE) PV guest without seeing an issue. I just now tried again,
and I don't see either of the two debug warnings. So quite clear
any indication towards a pvops problem.

Jan

x86/MSI-X: access MSI-X table only after having enabled MSI-X

As done in Linux by f598282f51 (PCI: Fix the NIU MSI-X problem in a
better way) and its broken predecessor, make sure we don't access the
MSI-X table without having enabled MSI-X first, using the mask-all flag
instead to prevent interrupts from occurring.

Signed-off-by: Jan Beulich jbeul...@suse.com
---
v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled

--- unstable.orig/xen/arch/x86/msi.c
+++ unstable/xen/arch/x86/msi.c
@@ -142,6 +142,21 @@ static bool_t memory_decoded(const struc
   PCI_COMMAND_MEMORY);
 }
 
+static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
+{
+u16 control = pci_conf_read16(dev-seg, dev-bus, PCI_SLOT(dev-devfn),
+  PCI_FUNC(dev-devfn), msix_control_reg(pos));
+
+if ( !(control  PCI_MSIX_FLAGS_ENABLE) )
+{//temp
+ static bool_t warned;
+ WARN_ON(!test_and_set_bool(warned));
+return 0;
+}
+
+return memory_decoded(dev);
+}
+
 /*
  * MSI message composition
  */
@@ -219,7 +234,8 @@ static bool_t read_msi_msg(struct msi_de
 void __iomem *base;
 base = entry-mask_base;
 
-if ( unlikely(!memory_decoded(entry-dev)) )
+if ( unlikely(!msix_memory_decoded(entry-dev,
+   entry-msi_attrib.pos)) )
 return 0;
 msg-address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
 msg-address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -285,7 +301,8 @@ static int write_msi_msg(struct msi_desc
 void __iomem *base;
 base = entry-mask_base;
 
-if ( unlikely(!memory_decoded(entry-dev)) )
+if ( unlikely(!msix_memory_decoded(entry-dev,
+   entry-msi_attrib.pos)) )
 return -ENXIO;
 writel(msg-address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -379,7 +396,7 @@ static bool_t msi_set_mask_bit(struct ir
 {
 struct msi_desc *entry = desc-msi_desc;
 struct pci_dev *pdev;
-u16 seg;
+u16 seg, control;
 u8 bus, slot, func;
 
 ASSERT(spin_is_locked(desc-lock));
@@ -401,35 +418,38 @@ static bool_t msi_set_mask_bit(struct ir
 }
 break;
 case PCI_CAP_ID_MSIX:
+control = pci_conf_read16(seg, bus, slot, func,
+  msix_control_reg(entry-msi_attrib.pos));
+if ( unlikely(!(control  PCI_MSIX_FLAGS_ENABLE)) )
+pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry-msi_attrib.pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+PCI_MSIX_FLAGS_MASKALL));
 if ( likely(memory_decoded(pdev)) )
 {
 writel(flag, entry-mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
 readl(entry-mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-break;
+if ( likely(control  PCI_MSIX_FLAGS_ENABLE) )
+break;
+flag = 1;
 }
-if ( flag )
+else if ( flag  !(control  PCI_MSIX_FLAGS_MASKALL) )
 {
-u16 control;
 domid_t domid = pdev-domain-domain_id;
 
-control = pci_conf_read16(seg, bus, slot, func,
-  

Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-16 Thread Jan Beulich
 On 15.04.15 at 19:41, konrad.w...@oracle.com wrote:
 On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
 You mentioning XSA-120 and its addendum - are these requirements
 for the problem to be seen? I admit I may have tested a PV guest
 only with an SR-IOV VF (and only a HVM guest also with an ordinary
 device), but I'd like to be clear about the validity of the connection.
 
 No. I just tried with v4.0-rc5 (and then also v4.0) and just 
 using SR-IOV to make this simpler.

Good.

 With staging  + two of your patches:
 a10cc68 TODO: drop //temp-s
 1b8721c x86/MSI-X: be more careful during teardown
 
 When trying to enable SR-IOV I get this error:

Okay, this definitely works for me, albeit I know I had to do
adjustments to avoid running into the (debug) warning you've
hit. But (looking at the call stack) it surely would be a mistake to
set up an MSI-X IRQ on the device without first enabling MSI-X
on the it (i.e. the error returned could be considered legitimate).
While we may want the hypervisor to cope with this (by enabling
MSI-X on this path, which I'd have to add to that patch), is this
hypervisor change perhaps uncovering a pv-ops kernel issue (in
that other than what drivers/pci/msi.c does as of the commit
mentioned in the description of that second patch some Xen-
specific path fails to enable MSI-X before setting up any of the
entries)?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-16 Thread Konrad Rzeszutek Wilk
On Thu, Apr 16, 2015 at 08:43:30AM +0100, Jan Beulich wrote:
  On 15.04.15 at 19:41, konrad.w...@oracle.com wrote:
  On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
  You mentioning XSA-120 and its addendum - are these requirements
  for the problem to be seen? I admit I may have tested a PV guest
  only with an SR-IOV VF (and only a HVM guest also with an ordinary
  device), but I'd like to be clear about the validity of the connection.
  
  No. I just tried with v4.0-rc5 (and then also v4.0) and just 
  using SR-IOV to make this simpler.
 
 Good.
 
  With staging  + two of your patches:
  a10cc68 TODO: drop //temp-s
  1b8721c x86/MSI-X: be more careful during teardown
  
  When trying to enable SR-IOV I get this error:
 
 Okay, this definitely works for me, albeit I know I had to do
 adjustments to avoid running into the (debug) warning you've
 hit. But (looking at the call stack) it surely would be a mistake to
 set up an MSI-X IRQ on the device without first enabling MSI-X
 on the it (i.e. the error returned could be considered legitimate).
 While we may want the hypervisor to cope with this (by enabling
 MSI-X on this path, which I'd have to add to that patch), is this
 hypervisor change perhaps uncovering a pv-ops kernel issue (in
 that other than what drivers/pci/msi.c does as of the commit
 mentioned in the description of that second patch some Xen-
 specific path fails to enable MSI-X before setting up any of the
 entries)?

Everything is possible :-)

What kernel are you using? Or better yet - what branch/tree could
one find it at?
 
 Jan
 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-15 Thread Konrad Rzeszutek Wilk
On Mon, Apr 13, 2015 at 10:05:14AM +0100, Jan Beulich wrote:
  On 10.04.15 at 22:02, konrad.w...@oracle.com wrote:
  On Wed, Mar 25, 2015 at 04:39:49PM +, Jan Beulich wrote:
  As done in Linux by f598282f51 (PCI: Fix the NIU MSI-X problem in a
  better way) and its broken predecessor, make sure we don't access the
  MSI-X table without having enabled MSI-X first, using the mask-all flag
  instead to prevent interrupts from occurring.
  
  This causes an regression with an Linux guest that has the XSA120 + XSA120
  addendum with PV guests (hadn't tried yet HVM).
 
 You mentioning XSA-120 and its addendum - are these requirements
 for the problem to be seen? I admit I may have tested a PV guest
 only with an SR-IOV VF (and only a HVM guest also with an ordinary
 device), but I'd like to be clear about the validity of the connection.

No. I just tried with v4.0-rc5 (and then also v4.0) and just 
using SR-IOV to make this simpler.

With staging  + two of your patches:
a10cc68 TODO: drop //temp-s
1b8721c x86/MSI-X: be more careful during teardown

When trying to enable SR-IOV I get this error:

failed to echo 1  
/sys/devices/pci:00/:00:01.0/:0a:00.0/sriov_numvfs, rc: 1
(hadn't tried just passing in an HVM guest).

Attached is the 'xl dmesg'.

Please keep in mind that if I do this based on

70a3cbb x86/vMSI-X: honor all mask requests

or 
df9f567 x86/vMSI-X: add valid bits for read acceleration

It works and I can setup VFs (and do PCI operations).


Here is the 'xl dmesg' output.

 Xen 4.6-unstable
(XEN) Xen version 4.6-unstable (konrad@(none)) (gcc (GCC) 4.4.4 20100503 (Red 
Hat 4.4.4-2)) debug=y Wed Apr 15 11:44:44 EDT 2015
(XEN) Latest ChangeSet: Wed Apr 15 11:43:31 2015 -0400 git:a10cc68
(XEN) Bootloader: unknown
(XEN) Command line: com1=115200,8n1 dom0_mem=999M,max:1232M dom0_max_vcpus=3 
cpufreq=performance,verbose iommu=verbose,no-intremap console=com1,vga 
loglvl=all guest_loglvl=all
(XEN) Video information:
(XEN)  VGA is text mode 80x25, font 8x16
(XEN)  VBE/DDC methods: none; EDID transfer time: 2 seconds
(XEN)  EDID info not retrieved because no DDC retrieval method detected
(XEN) Disc information:
(XEN)  Found 2 MBR signatures
(XEN)  Found 2 EDD information structures
(XEN) Xen-e820 RAM map:
(XEN)   - 0009e800 (usable)
(XEN)  0009e800 - 000a (reserved)
(XEN)  000e6000 - 0010 (reserved)
(XEN)  0010 - bf77 (usable)
(XEN)  bf77 - bf77e000 (ACPI data)
(XEN)  bf77e000 - bf7d (ACPI NVS)
(XEN)  bf7d - bf7e (reserved)
(XEN)  bf7ec000 - c000 (reserved)
(XEN)  e000 - f000 (reserved)
(XEN)  fee0 - fee01000 (reserved)
(XEN)  ffc0 - 0001 (reserved)
(XEN)  0001 - 00034000 (usable)
(XEN) ACPI: RSDP 000FA180, 0024 (r2 ACPIAM)
(XEN) ACPI: XSDT BF770100, 006C (r1 SMCI20111028 MSFT   97)
(XEN) ACPI: FACP BF770290, 00F4 (r4 102811 FACP1450 20111028 MSFT   97)
(XEN) ACPI: DSDT BF7706C0, 5CBB (r2  1F280 1F280 INTL 20051117)
(XEN) ACPI: FACS BF77E000, 0040
(XEN) ACPI: APIC BF770390, 0136 (r2 102811 APIC1450 20111028 MSFT   97)
(XEN) ACPI: MCFG BF7704D0, 003C (r1 102811 OEMMCFG  20111028 MSFT   97)
(XEN) ACPI: SLIT BF770510, 0030 (r1 102811 OEMSLIT  20111028 MSFT   97)
(XEN) ACPI: OEMB BF77E040, 0092 (r1 102811 OEMB1450 20111028 MSFT   97)
(XEN) ACPI: SRAT BF77A6C0, 01A8 (r2 102811 OEMSRAT 1 INTL1)
(XEN) ACPI: HPET BF77A870, 0038 (r1 102811 OEMHPET  20111028 MSFT   97)
(XEN) ACPI: DMAR BF77E0E0, 0120 (r1AMI  OEMDMAR1 MSFT   97)
(XEN) ACPI: SSDT BF781720, 0363 (r1 DpgPmmCpuPm   12 INTL 20051117)
(XEN) System RAM: 12279MB (12573752kB)
(XEN) SRAT: PXM 0 - APIC 0 - Node 0
(XEN) SRAT: PXM 0 - APIC 2 - Node 0
(XEN) SRAT: PXM 0 - APIC 4 - Node 0
(XEN) SRAT: PXM 0 - APIC 6 - Node 0
(XEN) SRAT: PXM 0 - APIC 1 - Node 0
(XEN) SRAT: PXM 0 - APIC 3 - Node 0
(XEN) SRAT: PXM 0 - APIC 5 - Node 0
(XEN) SRAT: PXM 0 - APIC 7 - Node 0
(XEN) SRAT: PXM 1 - APIC 16 - Node 1
(XEN) SRAT: PXM 1 - APIC 18 - Node 1
(XEN) SRAT: PXM 1 - APIC 20 - Node 1
(XEN) SRAT: PXM 1 - APIC 22 - Node 1
(XEN) SRAT: PXM 1 - APIC 17 - Node 1
(XEN) SRAT: PXM 1 - APIC 19 - Node 1
(XEN) SRAT: PXM 1 - APIC 21 - Node 1
(XEN) SRAT: PXM 1 - APIC 23 - Node 1
(XEN) SRAT: Node 1 PXM 1 0-a
(XEN) SRAT: Node 1 PXM 1 10-c000
(XEN) SRAT: Node 1 PXM 1 1-34000
(XEN) NUMA: Allocated memnodemap from 338f4f000 - 338f53000
(XEN) NUMA: Using 8 for the hash shift.
(XEN) SRAT: Node 0 has no memory. BIOS Bug or mis-configured hardware?
(XEN) Domain heap initialised DMA width 9 bits
(XEN) found SMP MP-table at 000ff780
(XEN) DMI present.
(XEN) Using APIC driver default
(XEN) ACPI: PM-Timer IO Port: 0x808
(XEN) ACPI: SLEEP INFO: pm1x_cnt[1:804,1:0], pm1x_evt[1:800,1:0]
(XEN) ACPI: 

Re: [Xen-devel] [PATCH v2 2/4] x86/MSI-X: access MSI-X table only after having enabled MSI-X

2015-04-13 Thread Jan Beulich
 On 10.04.15 at 22:02, konrad.w...@oracle.com wrote:
 On Wed, Mar 25, 2015 at 04:39:49PM +, Jan Beulich wrote:
 As done in Linux by f598282f51 (PCI: Fix the NIU MSI-X problem in a
 better way) and its broken predecessor, make sure we don't access the
 MSI-X table without having enabled MSI-X first, using the mask-all flag
 instead to prevent interrupts from occurring.
 
 This causes an regression with an Linux guest that has the XSA120 + XSA120
 addendum with PV guests (hadn't tried yet HVM).

You mentioning XSA-120 and its addendum - are these requirements
for the problem to be seen? I admit I may have tested a PV guest
only with an SR-IOV VF (and only a HVM guest also with an ordinary
device), but I'd like to be clear about the validity of the connection.

 When PV guest requests an MSI-X, pciback gets:
 
 [  122.778654] xen-pciback: :0a:00.0: enable MSI-X
 [  122.778861] pciback :0a:00.0: xen map irq failed -6 for 1 domain
 [  122.778887] xen_pciback: :0a:00.0: error enabling MSI-X for guest 1: 
 err -6!

Yeah, there were a few adjustments necessary to get similar issues
under control for HVM guests - maybe there's a path left not covered
by what's done for HVM guests.

 The device has the PCI_COMMAND enabled correctly:
 
 # lspci -s 0a:0.0 -vvv | head
 0a:00.0 Ethernet controller: Intel Corporation 82576 Gigabit Network 
 Connection (rev 01)
 Subsystem: Super Micro Computer Inc Device 10c9
 Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- 
 Stepping- SERR- FastB2B- DisINTx-
 
 Attaching the 'xl dmesg', 'dmesg', and 'lspci'

Right - MSI-X is still disabled if the lspci really matches the state at
the time the failure occurred. I'm attaching the original patch with
some debugging code left in, which I suppose would show where
the problematic path is in case you can get to it before I would.

Jan


TODO: drop //temp-s

As done in Linux by f598282f51 (PCI: Fix the NIU MSI-X problem in a
better way) and its broken predecessor, make sure we don't access the
MSI-X table without having enabled MSI-X first, using the mask-all flag
instead to prevent interrupts from occurring.

--- unstable.orig/xen/arch/x86/msi.c2015-02-10 08:19:17.0 +0100
+++ unstable/xen/arch/x86/msi.c 2015-02-10 09:05:12.0 +0100
@@ -142,6 +142,23 @@ static bool_t memory_decoded(const struc
   PCI_COMMAND_MEMORY);
 }
 
+static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
+{
+u16 control = pci_conf_read16(dev-seg, dev-bus,
+  PCI_SLOT(dev-devfn),
+  PCI_FUNC(dev-devfn),
+  msix_control_reg(pos));
+
+if ( !(control  PCI_MSIX_FLAGS_ENABLE) )
+{//temp
+ static bool_t warned;
+ WARN_ON(!test_and_set_bool(warned));
+return 0;
+}
+
+return memory_decoded(dev);
+}
+
 /*
  * MSI message composition
  */
@@ -219,7 +236,8 @@ static bool_t read_msi_msg(struct msi_de
 void __iomem *base;
 base = entry-mask_base;
 
-if ( unlikely(!memory_decoded(entry-dev)) )
+if ( unlikely(!msix_memory_decoded(entry-dev,
+   entry-msi_attrib.pos)) )
 return 0;
 msg-address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
 msg-address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -285,7 +303,8 @@ static int write_msi_msg(struct msi_desc
 void __iomem *base;
 base = entry-mask_base;
 
-if ( unlikely(!memory_decoded(entry-dev)) )
+if ( unlikely(!msix_memory_decoded(entry-dev,
+   entry-msi_attrib.pos)) )
 return -ENXIO;
 writel(msg-address_lo,
base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
@@ -379,7 +398,7 @@ static bool_t msi_set_mask_bit(struct ir
 {
 struct msi_desc *entry = desc-msi_desc;
 struct pci_dev *pdev;
-u16 seg;
+u16 seg, control;
 u8 bus, slot, func;
 
 ASSERT(spin_is_locked(desc-lock));
@@ -401,35 +420,38 @@ static bool_t msi_set_mask_bit(struct ir
 }
 break;
 case PCI_CAP_ID_MSIX:
+control = pci_conf_read16(seg, bus, slot, func,
+  msix_control_reg(entry-msi_attrib.pos));
+if ( unlikely(!(control  PCI_MSIX_FLAGS_ENABLE)) )
+pci_conf_write16(seg, bus, slot, func,
+ msix_control_reg(entry-msi_attrib.pos),
+ control | (PCI_MSIX_FLAGS_ENABLE |
+PCI_MSIX_FLAGS_MASKALL));
 if ( likely(memory_decoded(pdev)) )
 {
 writel(flag, entry-mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
 readl(entry-mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-break;
+if ( likely(control  PCI_MSIX_FLAGS_ENABLE) )
+break;
+flag = 1;
 }
-if ( flag )
+