[PATCH v3 net 0/2 RESEND] net: ixgbe: Use new flag to disable Relaxed Ordering

2017-08-20 Thread Ding Tianhong
The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

The ixgbe driver could use this flag to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attributes set.

v2: Simplify the original program according Alex's suggestion,
remove the new ixgbe flag2 and only check the bit4 in the
PCIe Device Control register. 

v3: Remove the code that clears the bits in DCA_T/RXCTRL, relaxed
ordering should be enabled by the HW when the bus allow it.

Ding Tianhong (2):
  Revert commit 1a8b6d76dc5b ("net:add one common config...")
  net: ixgbe: Use new IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING flag

 arch/Kconfig|  3 --
 arch/sparc/Kconfig  |  1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 37 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 32 +++--
 4 files changed, 35 insertions(+), 38 deletions(-)

-- 
1.8.3.1




[PATCH v3 net 2/2 RESEND] net: ixgbe: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-20 Thread Ding Tianhong
The ixgbe driver use the compile check to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attribute set,
this is too inconvenient, now the new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING
has been added to the kernel and we could check the bit4 in the PCIe
Device Control register to determine whether we should use the Relaxed
Ordering Attributes or not, so use this new way in the ixgbe driver.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 22 --
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 19 ---
 2 files changed, 41 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 523f9d0..8a32eb7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -175,31 +175,9 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
  **/
 static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
 {
-#ifndef CONFIG_SPARC
-   u32 regval;
-   u32 i;
-#endif
s32 ret_val;
 
ret_val = ixgbe_start_hw_generic(hw);
-
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; ((i < hw->mac.max_tx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
-   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
-   }
-
-   for (i = 0; ((i < hw->mac.max_rx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
-   }
-#endif
if (ret_val)
return ret_val;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index d4933d2..96c324f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -350,25 +350,6 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; i < hw->mac.max_tx_queues; i++) {
-   u32 regval;
-
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
-   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
-   }
-
-   for (i = 0; i < hw->mac.max_rx_queues; i++) {
-   u32 regval;
-
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
-   }
-#endif
return 0;
 }
 
-- 
1.8.3.1




[PATCH v3 net 1/2 RESEND] Revert commit 1a8b6d76dc5b ("net:add one common config...")

2017-08-20 Thread Ding Tianhong
The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

With this new flag  we don't need the config ARCH_WANT_RELAX_ORDER
to control the Relaxed Ordering Attributes for the ixgbe drivers
just like the commit 1a8b6d76dc5b ("net:add one common config...") did,
so revert this commit.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 arch/Kconfig| 3 ---
 arch/sparc/Kconfig  | 1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 21d0089..00cfc63 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -928,9 +928,6 @@ config STRICT_MODULE_RWX
  and non-text memory will be made non-executable. This provides
  protection against certain security exploits (e.g. writing to text)
 
-config ARCH_WANT_RELAX_ORDER
-   bool
-
 config REFCOUNT_FULL
bool "Perform full reference count validation at the expense of speed"
help
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index a4a6261..987a575 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -44,7 +44,6 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select LOCKDEP_SMALL if LOCKDEP
-   select ARCH_WANT_RELAX_ORDER
 
 config SPARC32
def_bool !64BIT
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 4e35e70..d4933d2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -350,7 +350,7 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_ARCH_WANT_RELAX_ORDER
+#ifndef CONFIG_SPARC
/* Disable relaxed ordering */
for (i = 0; i < hw->mac.max_tx_queues; i++) {
u32 regval;
-- 
1.8.3.1




[PATCH v3 net 0/2] net: ixgbe: Use new flag to disable Relaxed Ordering

2017-08-18 Thread Ding Tianhong
From: Mao Wenan <maowe...@huawei.com>

The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

The ixgbe driver could use this flag to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attributes set.

v2: Simplify the original program according Alex's suggestion,
remove the new ixgbe flag2 and only check the bit4 in the
PCIe Device Control register. 

v3: Remove the code that clears the bits in DCA_T/RXCTRL, relaxed
ordering should be enabled by the HW when the bus allow it.

Ding Tianhong (2):
  Revert commit 1a8b6d76dc5b ("net:add one common config...")
  net: ixgbe: Use new IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING flag

 arch/Kconfig|  3 --
 arch/sparc/Kconfig  |  1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 37 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 32 +++--
 4 files changed, 35 insertions(+), 38 deletions(-)

-- 
1.8.3.1




[PATCH v3 net 2/2] net: ixgbe: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-18 Thread Ding Tianhong
The ixgbe driver use the compile check to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attribute set,
this is too inconvenient, now the new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING
has been added to the kernel and we could check the bit4 in the PCIe
Device Control register to determine whether we should use the Relaxed
Ordering Attributes or not, so use this new way in the ixgbe driver.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 22 --
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 19 ---
 2 files changed, 41 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 523f9d0..8a32eb7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -175,31 +175,9 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
  **/
 static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
 {
-#ifndef CONFIG_SPARC
-   u32 regval;
-   u32 i;
-#endif
s32 ret_val;
 
ret_val = ixgbe_start_hw_generic(hw);
-
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; ((i < hw->mac.max_tx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
-   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
-   }
-
-   for (i = 0; ((i < hw->mac.max_rx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
-   }
-#endif
if (ret_val)
return ret_val;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index d4933d2..96c324f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -350,25 +350,6 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; i < hw->mac.max_tx_queues; i++) {
-   u32 regval;
-
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
-   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
-   }
-
-   for (i = 0; i < hw->mac.max_rx_queues; i++) {
-   u32 regval;
-
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
-   }
-#endif
return 0;
 }
 
-- 
1.8.3.1




[PATCH v3 net 1/2] Revert commit 1a8b6d76dc5b ("net:add one common config...")

2017-08-18 Thread Ding Tianhong
The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

With this new flag  we don't need the config ARCH_WANT_RELAX_ORDER
to control the Relaxed Ordering Attributes for the ixgbe drivers
just like the commit 1a8b6d76dc5b ("net:add one common config...") did,
so revert this commit.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 arch/Kconfig| 3 ---
 arch/sparc/Kconfig  | 1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 21d0089..00cfc63 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -928,9 +928,6 @@ config STRICT_MODULE_RWX
  and non-text memory will be made non-executable. This provides
  protection against certain security exploits (e.g. writing to text)
 
-config ARCH_WANT_RELAX_ORDER
-   bool
-
 config REFCOUNT_FULL
bool "Perform full reference count validation at the expense of speed"
help
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index a4a6261..987a575 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -44,7 +44,6 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select LOCKDEP_SMALL if LOCKDEP
-   select ARCH_WANT_RELAX_ORDER
 
 config SPARC32
def_bool !64BIT
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 4e35e70..d4933d2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -350,7 +350,7 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_ARCH_WANT_RELAX_ORDER
+#ifndef CONFIG_SPARC
/* Disable relaxed ordering */
for (i = 0; i < hw->mac.max_tx_queues; i++) {
u32 regval;
-- 
1.8.3.1




Re: [PATCH net v2 2/2] net: ixgbe: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-17 Thread Ding Tianhong


On 2017/8/18 13:04, Tantilov, Emil S wrote:
>> -Original Message-
>> From: Ding Tianhong [mailto:dingtianh...@huawei.com]
>> Sent: Thursday, August 17, 2017 5:39 PM
>> To: Tantilov, Emil S <emil.s.tanti...@intel.com>; da...@davemloft.net;
>> Kirsher, Jeffrey T <jeffrey.t.kirs...@intel.com>; keesc...@chromium.org;
>> linux-ker...@vger.kernel.org; sparcli...@vger.kernel.org; intel-wired-
>> l...@lists.osuosl.org; alexander.du...@gmail.com; netdev@vger.kernel.org;
>> linux...@huawei.com
>> Subject: Re: [PATCH net v2 2/2] net: ixgbe: Use new
>> PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
>>
>>
>>
>> On 2017/8/17 22:17, Tantilov, Emil S wrote:
>>
>>>>ret_val = ixgbe_start_hw_generic(hw);
>>>>
>>>> -#ifndef CONFIG_SPARC
>>>> -  /* Disable relaxed ordering */
>>>> -  for (i = 0; ((i < hw->mac.max_tx_queues) &&
>>>> -   (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
>>>> -  regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
>>>> -  regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
>>>> -  IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
>>>> -  }
>>>> +  if (!pcie_relaxed_ordering_enabled(adapter->pdev)) {
>>>
>>> As Alex mentioned there is no need for this check in any form.
>>>
>>> The HW defaults to Relaxed Ordering enabled unless it is disabled in
>>> the PCIe Device Control Register. So the above logic is already done by
>> HW.
>>>
>>> All you have to do is strip the code disabling relaxed ordering.
>>>
>>
>> Hi Tantilov:
>>
>> I misunderstood Alex's suggestion, But I still couldn't find the logic
>> where
>> the HW disable the Relaxed Ordering when the PCIe Device Control Register
>> disable it, can you point it out?
> 
> If you look at the datasheet (82599) - the description of CTRL_EXT.RO_DIS 
> (bit 17, 0b):
> 
> Relaxed Ordering Disable. When set to 1b, the device does not request any 
> relaxed
> ordering transactions. When this bit is cleared and the Enable Relaxed 
> Ordering bit in
> the Device Control register is set, the device requests relaxed ordering 
> transactions per queues as configured in the DCA_RXCTRL[n] and DCA_TXCTRL[n] 
> registers.
> 
> So if you remove the code that clears the bits in DCA_T/RXCTRL relaxed 
> ordering should
> be enabled by HW when the bus allows it.
> 

Great, Thanks for your explanation.

> Thanks,
> Emil
> 
> 
> .
> 



Re: [PATCH net v2 2/2] net: ixgbe: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-17 Thread Ding Tianhong


On 2017/8/17 22:17, Tantilov, Emil S wrote:

>>  ret_val = ixgbe_start_hw_generic(hw);
>>
>> -#ifndef CONFIG_SPARC
>> -/* Disable relaxed ordering */
>> -for (i = 0; ((i < hw->mac.max_tx_queues) &&
>> - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
>> -regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
>> -regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
>> -IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
>> -}
>> +if (!pcie_relaxed_ordering_enabled(adapter->pdev)) {
> 
> As Alex mentioned there is no need for this check in any form.
> 
> The HW defaults to Relaxed Ordering enabled unless it is disabled in 
> the PCIe Device Control Register. So the above logic is already done by HW.
> 
> All you have to do is strip the code disabling relaxed ordering.
> 

Hi Tantilov:

I misunderstood Alex's suggestion, But I still couldn't find the logic where
the HW disable the Relaxed Ordering when the PCIe Device Control Register
disable it, can you point it out?

Thanks
Ding

> Thanks,
> Emil
> 
> 
> .
> 



Re: [PATCH net] PCI: fix the return value for the pci_find_pcie_root_port()

2017-08-17 Thread Ding Tianhong


On 2017/8/17 21:30, Thierry Reding wrote:
> On Thu, Aug 17, 2017 at 08:40:16PM +0800, Ding Tianhong wrote:
>>
>>
>> On 2017/8/17 18:51, Thierry Reding wrote:
>>> On Thu, Aug 17, 2017 at 10:25:30AM +0800, Ding Tianhong wrote:
>>>> The pci_find_pcie_root_port() would return NULL if the given
>>>> dev is already a Root Port, it looks like unfriendly to the
>>>> PCIe Root Port device, Thierry and Bjorn suggest to let this
>>>> function return the given dev under this circumstances.
>>>>
>>>> Fixes: 0e405232871d6 ("PCI: fix oops when try to find Root Port for a PCI 
>>>> device")
>>>> Suggested-by: Thierry Reding <thierry.red...@gmail.com>
>>>> Suggested-by: Bjorn Helgaas <helg...@kernel.org>
>>>> Signed-off-by: Thierry Reding <thierry.red...@gmail.com>
>>>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>>>> ---
>>>>  drivers/pci/pci.c | 2 +-
>>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>>>> index 7e2022f..352bb53 100644
>>>> --- a/drivers/pci/pci.c
>>>> +++ b/drivers/pci/pci.c
>>>> @@ -514,7 +514,7 @@ struct resource *pci_find_resource(struct pci_dev 
>>>> *dev, struct resource *res)
>>>>   */
>>>>  struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev)
>>>>  {
>>>> -  struct pci_dev *bridge, *highest_pcie_bridge = NULL;
>>>> +  struct pci_dev *bridge, *highest_pcie_bridge = dev;
>>>>  
>>>>bridge = pci_upstream_bridge(dev);
>>>>while (bridge && pci_is_pcie(bridge)) {
>>>
>>> I think this should actually be this change on top of a revert of commit
>>> 0e405232871d6 ("PCI: fix oops when try to find Root Port for a PCI
>>> device"). After the above change, the previous fix will have a redundant
>>> check because highest_pcie_bridge will never be NULL.
>>>
>>> Let me send out that version to clarify what I mean.
>>>
>>
>> Hi Thierry:
>>
>> The patch ("PCI: fix oops when try to find Root Port for a PCI device")
>> has been merge to the linus mainline tree before you found this 
>> deficiencies
> 
> I understand that. I'm just saying that there's no point keeping that
> change around because it no longer makes sense after we initialize the
> highest_pcie_bridge variable to dev.
> 

Ok, NO problem.:)

> Thierry
> 



Re: [PATCH net] PCI: fix the return value for the pci_find_pcie_root_port()

2017-08-17 Thread Ding Tianhong


On 2017/8/17 18:51, Thierry Reding wrote:
> On Thu, Aug 17, 2017 at 10:25:30AM +0800, Ding Tianhong wrote:
>> The pci_find_pcie_root_port() would return NULL if the given
>> dev is already a Root Port, it looks like unfriendly to the
>> PCIe Root Port device, Thierry and Bjorn suggest to let this
>> function return the given dev under this circumstances.
>>
>> Fixes: 0e405232871d6 ("PCI: fix oops when try to find Root Port for a PCI 
>> device")
>> Suggested-by: Thierry Reding <thierry.red...@gmail.com>
>> Suggested-by: Bjorn Helgaas <helg...@kernel.org>
>> Signed-off-by: Thierry Reding <thierry.red...@gmail.com>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> ---
>>  drivers/pci/pci.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 7e2022f..352bb53 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -514,7 +514,7 @@ struct resource *pci_find_resource(struct pci_dev *dev, 
>> struct resource *res)
>>   */
>>  struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev)
>>  {
>> -struct pci_dev *bridge, *highest_pcie_bridge = NULL;
>> +struct pci_dev *bridge, *highest_pcie_bridge = dev;
>>  
>>  bridge = pci_upstream_bridge(dev);
>>  while (bridge && pci_is_pcie(bridge)) {
> 
> I think this should actually be this change on top of a revert of commit
> 0e405232871d6 ("PCI: fix oops when try to find Root Port for a PCI
> device"). After the above change, the previous fix will have a redundant
> check because highest_pcie_bridge will never be NULL.
> 
> Let me send out that version to clarify what I mean.
> 

Hi Thierry:

The patch ("PCI: fix oops when try to find Root Port for a PCI device")
has been merge to the linus mainline tree before you found this deficiencies

Regards
Tianhong

> Thierry
> 



[PATCH net v2 0/2] net: ixgbe: Use new flag to disable Relaxed Ordering

2017-08-16 Thread Ding Tianhong
The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

The ixgbe driver could use this flag to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attributes set.

v2: Simplify the original program according Alex's suggestion,
remove the new ixgbe flag2 and only check the bit4 in the
PCIe Device Control register. 

Ding Tianhong (2):
  Revert commit 1a8b6d76dc5b ("net:add one common config...")
  net: ixgbe: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

 arch/Kconfig|  3 --
 arch/sparc/Kconfig  |  1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 37 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 32 +++--
 4 files changed, 35 insertions(+), 38 deletions(-)

-- 
1.8.3.1




[PATCH net v2 2/2] net: ixgbe: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-16 Thread Ding Tianhong
The ixgbe driver use the compile check to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attribute set,
this is too inconvenient, now the new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING
has been added to the kernel and we could check the bit4 in the PCIe
Device Control register to determine whether we should use the Relaxed
Ordering Attributes or not, so use this new way in the ixgbe driver.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 37 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 32 +++--
 2 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 523f9d0..d1571e3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -175,31 +175,30 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
  **/
 static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
 {
-#ifndef CONFIG_SPARC
-   u32 regval;
-   u32 i;
-#endif
+   u32 regval, i;
s32 ret_val;
+   struct ixgbe_adapter *adapter = hw->back;
 
ret_val = ixgbe_start_hw_generic(hw);
 
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; ((i < hw->mac.max_tx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
-   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
-   }
+   if (!pcie_relaxed_ordering_enabled(adapter->pdev)) {
+   /* Disable relaxed ordering */
+   for (i = 0; ((i < hw->mac.max_tx_queues) &&
+(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
+   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
+   }
 
-   for (i = 0; ((i < hw->mac.max_rx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+   for (i = 0; ((i < hw->mac.max_rx_queues) &&
+(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
+   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
+   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+   }
}
-#endif
+
if (ret_val)
return ret_val;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index d4933d2..d1052ee 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -342,6 +342,7 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
 {
u32 i;
+   struct ixgbe_adapter *adapter = hw->back;
 
/* Clear the rate limiters */
for (i = 0; i < hw->mac.max_tx_queues; i++) {
@@ -350,25 +351,26 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; i < hw->mac.max_tx_queues; i++) {
-   u32 regval;
+   if (!pcie_relaxed_ordering_enabled(adapter->pdev)) {
+   /* Disable relaxed ordering */
+   for (i = 0; i < hw->mac.max_tx_queues; i++) {
+   u32 regval;
 
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
-   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
-   }
+   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
+   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
+   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
+   }
 
-   for (i = 0; i < hw->mac.max_rx_queues; i++) {
-   u32 regval;
+   for (i = 0; i < hw->mac.max_rx_queues; i++) {
+   u32 regval;
 
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), reg

[PATCH net v2 1/2] Revert commit 1a8b6d76dc5b ("net:add one common config...")

2017-08-16 Thread Ding Tianhong
The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

With this new flag  we don't need the config ARCH_WANT_RELAX_ORDER
to control the Relaxed Ordering Attributes for the ixgbe drivers
just like the commit 1a8b6d76dc5b ("net:add one common config...") did,
so revert this commit.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 arch/Kconfig| 3 ---
 arch/sparc/Kconfig  | 1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 21d0089..00cfc63 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -928,9 +928,6 @@ config STRICT_MODULE_RWX
  and non-text memory will be made non-executable. This provides
  protection against certain security exploits (e.g. writing to text)
 
-config ARCH_WANT_RELAX_ORDER
-   bool
-
 config REFCOUNT_FULL
bool "Perform full reference count validation at the expense of speed"
help
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index a4a6261..987a575 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -44,7 +44,6 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select LOCKDEP_SMALL if LOCKDEP
-   select ARCH_WANT_RELAX_ORDER
 
 config SPARC32
def_bool !64BIT
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 4e35e70..d4933d2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -350,7 +350,7 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_ARCH_WANT_RELAX_ORDER
+#ifndef CONFIG_SPARC
/* Disable relaxed ordering */
for (i = 0; i < hw->mac.max_tx_queues; i++) {
u32 regval;
-- 
1.8.3.1




[PATCH net] PCI: fix the return value for the pci_find_pcie_root_port()

2017-08-16 Thread Ding Tianhong
The pci_find_pcie_root_port() would return NULL if the given
dev is already a Root Port, it looks like unfriendly to the
PCIe Root Port device, Thierry and Bjorn suggest to let this
function return the given dev under this circumstances.

Fixes: 0e405232871d6 ("PCI: fix oops when try to find Root Port for a PCI 
device")
Suggested-by: Thierry Reding <thierry.red...@gmail.com>
Suggested-by: Bjorn Helgaas <helg...@kernel.org>
Signed-off-by: Thierry Reding <thierry.red...@gmail.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7e2022f..352bb53 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -514,7 +514,7 @@ struct resource *pci_find_resource(struct pci_dev *dev, 
struct resource *res)
  */
 struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev)
 {
-   struct pci_dev *bridge, *highest_pcie_bridge = NULL;
+   struct pci_dev *bridge, *highest_pcie_bridge = dev;
 
bridge = pci_upstream_bridge(dev);
while (bridge && pci_is_pcie(bridge)) {
-- 
1.8.3.1




Re: [PATCH net 0/2] net: ixgbe: Use new flag to disable Relaxed Ordering

2017-08-16 Thread Ding Tianhong


On 2017/8/17 1:56, David Miller wrote:
> From: Ding Tianhong <dingtianh...@huawei.com>
> Date: Wed, 16 Aug 2017 17:41:45 +0800
> 
>> The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
>> to indicate that Relaxed Ordering Attributes (RO) should not
>> be used for Transaction Layer Packets (TLP) targeted toward
>> these affected Root Port, it will clear the bit4 in the PCIe
>> Device Control register, so the PCIe device drivers could
>> query PCIe configuration space to determine if it can send
>> TLPs to Root Port with the Relaxed Ordering Attributes set.
>>
>> The ixgbe driver could use this flag to determine if it can
>> send TLPs to Root Port with the Relaxed Ordering Attributes set.
> 
> I'll let the Intel guys pick this up.
> 
Thanks David, but I am not sure when the Intel guys would take over,
just Alex has replied, so I will release a new version according Alex's
suggestion.

> .
> 



Re: [PATCH net RESEND] PCI: fix oops when try to find Root Port for a PCI device

2017-08-16 Thread Ding Tianhong


On 2017/8/17 4:59, David Miller wrote:
> From: Bjorn Helgaas 
> Date: Wed, 16 Aug 2017 15:02:37 -0500
> 
>> Your fix looks right to me.
> 
> Someone please submit this fix formally because this change is now in
> Linus's tree.
> 

I will send it.

> Thank you.
> 
> .
> 



[PATCH net 1/2] Revert commit 1a8b6d76dc5b ("net:add one common config...")

2017-08-16 Thread Ding Tianhong
The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

With this new flag  we don't need the config ARCH_WANT_RELAX_ORDER
to control the Relaxed Ordering Attributes for the ixgbe drivers
just like the commit 1a8b6d76dc5b ("net:add one common config...") did,
so revert this commit.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 arch/Kconfig| 3 ---
 arch/sparc/Kconfig  | 1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 21d0089..00cfc63 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -928,9 +928,6 @@ config STRICT_MODULE_RWX
  and non-text memory will be made non-executable. This provides
  protection against certain security exploits (e.g. writing to text)
 
-config ARCH_WANT_RELAX_ORDER
-   bool
-
 config REFCOUNT_FULL
bool "Perform full reference count validation at the expense of speed"
help
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index a4a6261..987a575 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -44,7 +44,6 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select LOCKDEP_SMALL if LOCKDEP
-   select ARCH_WANT_RELAX_ORDER
 
 config SPARC32
def_bool !64BIT
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 4e35e70..d4933d2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -350,7 +350,7 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_ARCH_WANT_RELAX_ORDER
+#ifndef CONFIG_SPARC
/* Disable relaxed ordering */
for (i = 0; i < hw->mac.max_tx_queues; i++) {
u32 regval;
-- 
1.8.3.1




[PATCH net 0/2] net: ixgbe: Use new flag to disable Relaxed Ordering

2017-08-16 Thread Ding Tianhong
The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added
to indicate that Relaxed Ordering Attributes (RO) should not
be used for Transaction Layer Packets (TLP) targeted toward
these affected Root Port, it will clear the bit4 in the PCIe
Device Control register, so the PCIe device drivers could
query PCIe configuration space to determine if it can send
TLPs to Root Port with the Relaxed Ordering Attributes set.

The ixgbe driver could use this flag to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attributes set.

Ding Tianhong (2):
  Revert commit 1a8b6d76dc5b ("net:add one common config...")
  net: ixgbe: Use new IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING flag

 arch/Kconfig|  3 --
 arch/sparc/Kconfig  |  1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe.h|  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 37 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 32 +++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   | 17 
 6 files changed, 53 insertions(+), 38 deletions(-)

-- 
1.8.3.1




[PATCH net 2/2] net: ixgbe: Use new IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING flag

2017-08-16 Thread Ding Tianhong
The ixgbe driver use the compile check to determine if it can
send TLPs to Root Port with the Relaxed Ordering Attribute set,
this is too inconvenient, now the new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING
has been added to the kernel and we could check the bit4 in the PCIe
Davice Control register to determine whether we should use the Relaxed
Ordering Attributes or not, so we add a new flag which called
IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING to the ixgbe driver, it will
be set if the Root Port couldn't deal the upstream TLPs with Relaxed
Ordering Attribute, then the driver could know what to do next.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h|  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c  | 37 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 32 +++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   | 17 
 4 files changed, 53 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index dd55787..50e0553 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -621,6 +621,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG2_EEE_CAPABLEBIT(14)
 #define IXGBE_FLAG2_EEE_ENABLEDBIT(15)
 #define IXGBE_FLAG2_RX_LEGACY  BIT(16)
+#define IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING   BIT(17)
 
/* Tx fast path data */
int num_tx_queues;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 523f9d0..0727a30 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -175,31 +175,30 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
  **/
 static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
 {
-#ifndef CONFIG_SPARC
-   u32 regval;
-   u32 i;
-#endif
+   u32 regval, i;
s32 ret_val;
+   struct ixgbe_adapter *adapter = hw->back;
 
ret_val = ixgbe_start_hw_generic(hw);
 
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; ((i < hw->mac.max_tx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
-   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
-   }
+   if (adapter->flags2 & IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING) {
+   /* Disable relaxed ordering */
+   for (i = 0; ((i < hw->mac.max_tx_queues) &&
+(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+   regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+   regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
+   IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
+   }
 
-   for (i = 0; ((i < hw->mac.max_rx_queues) &&
-(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
-   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
-   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
-   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
-   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+   for (i = 0; ((i < hw->mac.max_rx_queues) &&
+(i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+   regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+   regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
+   IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
+   IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+   }
}
-#endif
+
if (ret_val)
return ret_val;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index d4933d2..2473c0b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -342,6 +342,7 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
 {
u32 i;
+   struct ixgbe_adapter *adapter = hw->back;
 
/* Clear the rate limiters */
for (i = 0; i < hw->mac.max_tx_queues; i++) {
@@ -350,25 +351,26 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
}
IXGBE_WRITE_FLUSH(hw);
 
-#ifndef CONFIG_SPARC
-   /* Disable relaxed ordering */
-   for (i = 0; i < hw->mac.max_tx_queues; i++) {
-   u32 regval;
+   if (adapter->flags2 & IXGBE_FLAG2_ROOT_NO_RELAXED_ORDERING) {
+   /* Disable relaxed ordering */
+   for (i = 0; i < hw->mac.max_tx_queues; i++) {
+   u32 regval;
 

[PATCH net RESEND] PCI: fix oops when try to find Root Port for a PCI device

2017-08-15 Thread Ding Tianhong
Eric report a oops when booting the system after applying
the commit a99b646afa8a ("PCI: Disable PCIe Relaxed..."):

[4.241029] BUG: unable to handle kernel NULL pointer dereference at 
0050
[4.247001] IP: pci_find_pcie_root_port+0x62/0x80
[4.253011] PGD 0
[4.253011] P4D 0
[4.253011]
[4.258013] Oops:  [#1] SMP DEBUG_PAGEALLOC
[4.262015] Modules linked in:
[4.265005] CPU: 31 PID: 1 Comm: swapper/0 Not tainted 4.13.0-dbx-DEV #316
[4.271002] Hardware name: Intel RML,PCH/Iota_QC_19, BIOS 2.40.0 06/22/2016
[4.279002] task: a2ee38cfa040 task.stack: a51ec0004000
[4.285001] RIP: 0010:pci_find_pcie_root_port+0x62/0x80
[4.290012] RSP: :a51ec0007ab8 EFLAGS: 00010246
[4.295003] RAX:  RBX: a2ee36bae000 RCX: 0006
[4.303002] RDX: 081c RSI: a2ee38cfa8c8 RDI: a2ee36bae000
[4.310013] RBP: a51ec0007b58 R08: 0001 R09: 
[4.317001] R10:  R11:  R12: a51ec0007ad0
[4.324005] R13: a2ee36bae098 R14: 0002 R15: a2ee37204818
[4.331002] FS:  () GS:a2ee3fcc() 
knlGS:
[4.339002] CS:  0010 DS:  ES:  CR0: 80050033
[4.345001] CR2: 0050 CR3: 00401000f000 CR4: 001406e0
[4.351002] Call Trace:
[4.354012]  ? pci_configure_device+0x19f/0x570
[4.359002]  ? pci_conf1_read+0xb8/0xf0
[4.363002]  ? raw_pci_read+0x23/0x40
[4.366011]  ? pci_read+0x2c/0x30
[4.370014]  ? pci_read_config_word+0x67/0x70
[4.374012]  pci_device_add+0x28/0x230
[4.378012]  ? pci_vpd_f0_read+0x50/0x80
[4.382014]  pci_scan_single_device+0x96/0xc0
[4.386012]  pci_scan_slot+0x79/0xf0
[4.389001]  pci_scan_child_bus+0x31/0x180
[4.394014]  acpi_pci_root_create+0x1c6/0x240
[4.398013]  pci_acpi_scan_root+0x15f/0x1b0
[4.402012]  acpi_pci_root_add+0x2e6/0x400
[4.406012]  ? acpi_evaluate_integer+0x37/0x60
[4.411002]  acpi_bus_attach+0xdf/0x200
[4.415002]  acpi_bus_attach+0x6a/0x200
[4.418014]  acpi_bus_attach+0x6a/0x200
[4.422013]  acpi_bus_scan+0x38/0x70
[4.426011]  acpi_scan_init+0x10c/0x271
[4.429001]  acpi_init+0x2fa/0x348
[4.433004]  ? acpi_sleep_proc_init+0x2d/0x2d
[4.437001]  do_one_initcall+0x43/0x169
[4.441001]  kernel_init_freeable+0x1d0/0x258
[4.445003]  ? rest_init+0xe0/0xe0
[4.449001]  kernel_init+0xe/0x150

== cut here =

It looks like the pci_find_pcie_root_port() was trying to
find the Root Port for the PCI device which is the Root
Port already, it will return NULL and trigger the problem,
so check the highest_pcie_bridge to fix thie problem.

Fixes: a99b646afa8a ("PCI: Disable PCIe Relaxed Ordering if unsupported")
Reported-by: Eric Dumazet <eric.duma...@gmail.com>
Signed-off-by: Eric Dumazet <eric.duma...@gmail.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc34..7e2022f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -522,7 +522,8 @@ struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev)
bridge = pci_upstream_bridge(bridge);
}
 
-   if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT)
+   if (highest_pcie_bridge &&
+   pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT)
return NULL;
 
return highest_pcie_bridge;
-- 
1.8.3.1




[PATCH net] pci: fix oops when try to find Root Port for a PCI device

2017-08-15 Thread Ding Tianhong
Eric report a oops when booting the system after applying
the commit a99b646afa8a ("PCI: Disable PCIe Relaxed..."):

[4.241029] BUG: unable to handle kernel NULL pointer dereference at 
0050
[4.247001] IP: pci_find_pcie_root_port+0x62/0x80
[4.253011] PGD 0
[4.253011] P4D 0
[4.253011]
[4.258013] Oops:  [#1] SMP DEBUG_PAGEALLOC
[4.262015] Modules linked in:
[4.265005] CPU: 31 PID: 1 Comm: swapper/0 Not tainted 4.13.0-dbx-DEV #316
[4.271002] Hardware name: Intel RML,PCH/Iota_QC_19, BIOS 2.40.0 06/22/2016
[4.279002] task: a2ee38cfa040 task.stack: a51ec0004000
[4.285001] RIP: 0010:pci_find_pcie_root_port+0x62/0x80
[4.290012] RSP: :a51ec0007ab8 EFLAGS: 00010246
[4.295003] RAX:  RBX: a2ee36bae000 RCX: 0006
[4.303002] RDX: 081c RSI: a2ee38cfa8c8 RDI: a2ee36bae000
[4.310013] RBP: a51ec0007b58 R08: 0001 R09: 
[4.317001] R10:  R11:  R12: a51ec0007ad0
[4.324005] R13: a2ee36bae098 R14: 0002 R15: a2ee37204818
[4.331002] FS:  () GS:a2ee3fcc() 
knlGS:
[4.339002] CS:  0010 DS:  ES:  CR0: 80050033
[4.345001] CR2: 0050 CR3: 00401000f000 CR4: 001406e0
[4.351002] Call Trace:
[4.354012]  ? pci_configure_device+0x19f/0x570
[4.359002]  ? pci_conf1_read+0xb8/0xf0
[4.363002]  ? raw_pci_read+0x23/0x40
[4.366011]  ? pci_read+0x2c/0x30
[4.370014]  ? pci_read_config_word+0x67/0x70
[4.374012]  pci_device_add+0x28/0x230
[4.378012]  ? pci_vpd_f0_read+0x50/0x80
[4.382014]  pci_scan_single_device+0x96/0xc0
[4.386012]  pci_scan_slot+0x79/0xf0
[4.389001]  pci_scan_child_bus+0x31/0x180
[4.394014]  acpi_pci_root_create+0x1c6/0x240
[4.398013]  pci_acpi_scan_root+0x15f/0x1b0
[4.402012]  acpi_pci_root_add+0x2e6/0x400
[4.406012]  ? acpi_evaluate_integer+0x37/0x60
[4.411002]  acpi_bus_attach+0xdf/0x200
[4.415002]  acpi_bus_attach+0x6a/0x200
[4.418014]  acpi_bus_attach+0x6a/0x200
[4.422013]  acpi_bus_scan+0x38/0x70
[4.426011]  acpi_scan_init+0x10c/0x271
[4.429001]  acpi_init+0x2fa/0x348
[4.433004]  ? acpi_sleep_proc_init+0x2d/0x2d
[4.437001]  do_one_initcall+0x43/0x169
[4.441001]  kernel_init_freeable+0x1d0/0x258
[4.445003]  ? rest_init+0xe0/0xe0
[4.449001]  kernel_init+0xe/0x150

== cut here =

It looks like the pci_find_pcie_root_port() was trying to
find the Root Port for the PCI device which is the Root
Port already, it will return NULL and trigger the problem,
so check the highest_pcie_bridge to fix thie problem.

Fixes: a99b646afa8a ("PCI: Disable PCIe Relaxed Ordering if unsupported")
Reported-by: Eric Dumazet <eric.duma...@gmail.com>
Signed-off-by: Eric Dumazet <eric.duma...@gmail.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc34..7e2022f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -522,7 +522,8 @@ struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev)
bridge = pci_upstream_bridge(bridge);
}
 
-   if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT)
+   if (highest_pcie_bridge &&
+   pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT)
return NULL;
 
return highest_pcie_bridge;
-- 
1.8.3.1




Re: [PATCH v11 0/5] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-15 Thread Ding Tianhong


On 2017/8/15 22:03, Eric Dumazet wrote:
> On Tue, 2017-08-15 at 06:58 -0700, Eric Dumazet wrote:
>> On Mon, 2017-08-14 at 22:15 -0700, David Miller wrote:
>>> From: Ding Tianhong <dingtianh...@huawei.com>
>>> Date: Tue, 15 Aug 2017 11:23:22 +0800
>>>
>>>> Some devices have problems with Transaction Layer Packets with the Relaxed
>>>> Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
>>>> PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
>>>> devices with Relaxed Ordering issues, and a use of this new flag by the
>>>> cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
>>>> Ports.
>>>  ...
>>>
>>> Series applied, thanks.
>>
>> I got a NULL deref in pci_find_pcie_root_port()
>>
> 
> This was :
> 
> [4.241029] BUG: unable to handle kernel NULL pointer dereference at 
> 0050
> [4.247001] IP: pci_find_pcie_root_port+0x62/0x80
> [4.253011] PGD 0 
> [4.253011] P4D 0 
> [4.253011] 
> [4.258013] Oops:  [#1] SMP DEBUG_PAGEALLOC
> [4.262015] Modules linked in:
> [4.265005] CPU: 31 PID: 1 Comm: swapper/0 Not tainted 4.13.0-dbx-DEV #316
> [4.271002] Hardware name: Intel RML,PCH/Iota_QC_19, BIOS 2.40.0 06/22/2016
> [4.279002] task: a2ee38cfa040 task.stack: a51ec0004000
> [4.285001] RIP: 0010:pci_find_pcie_root_port+0x62/0x80
> [4.290012] RSP: :a51ec0007ab8 EFLAGS: 00010246
> [4.295003] RAX:  RBX: a2ee36bae000 RCX: 
> 0006
> [4.303002] RDX: 081c RSI: a2ee38cfa8c8 RDI: 
> a2ee36bae000
> [4.310013] RBP: a51ec0007b58 R08: 0001 R09: 
> 
> [4.317001] R10:  R11:  R12: 
> a51ec0007ad0
> [4.324005] R13: a2ee36bae098 R14: 0002 R15: 
> a2ee37204818
> [4.331002] FS:  () GS:a2ee3fcc() 
> knlGS:
> [4.339002] CS:  0010 DS:  ES:  CR0: 80050033
> [4.345001] CR2: 0050 CR3: 00401000f000 CR4: 
> 001406e0
> [4.351002] Call Trace:
> [4.354012]  ? pci_configure_device+0x19f/0x570
> [4.359002]  ? pci_conf1_read+0xb8/0xf0
> [4.363002]  ? raw_pci_read+0x23/0x40
> [4.366011]  ? pci_read+0x2c/0x30
> [4.370014]  ? pci_read_config_word+0x67/0x70
> [4.374012]  pci_device_add+0x28/0x230
> [4.378012]  ? pci_vpd_f0_read+0x50/0x80
> [4.382014]  pci_scan_single_device+0x96/0xc0
> [4.386012]  pci_scan_slot+0x79/0xf0
> [4.389001]  pci_scan_child_bus+0x31/0x180
> [4.394014]  acpi_pci_root_create+0x1c6/0x240
> [4.398013]  pci_acpi_scan_root+0x15f/0x1b0
> [4.402012]  acpi_pci_root_add+0x2e6/0x400
> [4.406012]  ? acpi_evaluate_integer+0x37/0x60
> [4.411002]  acpi_bus_attach+0xdf/0x200
> [4.415002]  acpi_bus_attach+0x6a/0x200
> [4.418014]  acpi_bus_attach+0x6a/0x200
> [4.422013]  acpi_bus_scan+0x38/0x70
> [4.426011]  acpi_scan_init+0x10c/0x271
> [4.429001]  acpi_init+0x2fa/0x348
> [4.433004]  ? acpi_sleep_proc_init+0x2d/0x2d
> [4.437001]  do_one_initcall+0x43/0x169
> [4.441001]  kernel_init_freeable+0x1d0/0x258
> [4.445003]  ? rest_init+0xe0/0xe0
> [4.449001]  kernel_init+0xe/0x150
> [4.451002]  ret_from_fork+0x27/0x40
> [4.457004] Code: 85 d2 74 27 80 7a 4a 00 74 21 48 89 d0 48 89 c2 f6 80 1b 
> 09 00 00 10 74 07 48 8b 90 a0 0a 00 00 48 8b 52 10 48 83 7a 10 00 75 d0 <0f> 
> b7 50 50 5d 81 e2 f0 00 00 00 83 fa 40 ba 00 00 00 00 48 0f 
> [4.474012] RIP: pci_find_pcie_root_port+0x62/0x80 RSP: a51ec0007ab8
> [4.481004] CR2: 0050
> [4.484001] ---[ end trace 6f9be6a057581199 ]---
> [4.488001] Kernel panic - not syncing: Fatal exception
> [4.494013] Rebooting in 10 seconds..
> [4.494013] ACPI MEMORY or I/O RESET_REG.
> 
>>
>> This local hack seems to fix the issue.
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 
>> af0cc3456dc1b48b1325c06c5edd2ca8cc22a640..cfd8eb5a3d0ba8347d44952ffab28d9c761044d3
>>  100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -522,7 +522,7 @@ struct pci_dev *pci_find_pcie_root_port(struct pci_dev 
>> *dev)
>> bridge = pci_upstream_bridge(bridge);
>> }
>>  
>> -   if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT)
>> +   if (highest_pcie_bridge && pci_pcie_type(highest_pcie_bridge) != 
>> PCI_EXP_TYPE_ROOT_PORT)
>> return NULL;
>>  
>> return highest_pcie_bridge;
> 

It is very strange that I could not reproduce this problem on my server which 
is Xeon 2690v3,
but it is really a obviously issue when the dev could not find a upstream 
bridge in the
pci_find_pcie_root_port(), so the better way is just like your did in this 
patch. Thanks.

Regards
Tianhong

> 
> 
> .
> 



[PATCH v11 4/5] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-14 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Remove the enable_pcie_relaxed_ordering() to avoid enable PCIe Capability
Device Control[Relaxed Ordering Enable] at probe routine, to make sure
the driver will not send the Relaxed Ordering TLPs to the Root Complex which
could not deal the Relaxed Ordering TLPs.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 23 +--
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ef4be78..09ea62e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -529,6 +529,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e403fa1..33bb867 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4654,11 +4654,6 @@ static void print_port_info(const struct net_device *dev)
dev->name, adap->params.vpd.id, adap->name, buf);
 }
 
-static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
-{
-   pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
-}
-
 /*
  * Free the following resources:
  * - memory used for tables
@@ -4908,7 +4903,6 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
}
 
pci_enable_pcie_error_reporting(pdev);
-   enable_pcie_relaxed_ordering(pdev);
pci_set_master(pdev);
pci_save_state(pdev);
 
@@ -4947,6 +4941,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_enabled(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ede1220..4ef68f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2719,6 +2719,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2772,8 +2773,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.8.3.1




[PATCH v11 3/5] PCI: Disable Relaxed Ordering Attributes for AMD A1100

2017-08-14 Thread Ding Tianhong
Casey reported that the AMD ARM A1100 SoC has a bug in its PCIe
Root Port where Upstream Transaction Layer Packets with the Relaxed
Ordering Attribute clear are allowed to bypass earlier TLPs with
Relaxed Ordering set, it would cause Data Corruption, so we need
to disable Relaxed Ordering Attribute when Upstream TLPs to the
Root Port.

Reported-and-suggested-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/pci/quirks.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 1272f7e..1407604 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4089,6 +4089,22 @@ static void quirk_relaxedordering_disable(struct pci_dev 
*dev)
  quirk_relaxedordering_disable);
 
 /*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
-- 
1.8.3.1




[PATCH v11 5/5] net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-14 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4vf Ethernet driver now queries PCIe configuration space to
determine if it can send TLPs to it with the Relaxed Ordering
Attribute set, just like the pf did.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 18 ++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c  |  3 +++
 3 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h 
b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 109bc63..08c6ddb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -408,6 +408,7 @@ enum { /* adapter flags */
USING_MSI  = (1UL << 1),
USING_MSIX = (1UL << 2),
QUEUES_BOUND   = (1UL << 3),
+   ROOT_NO_RELAXED_ORDERING = (1UL << 4),
 };
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index ac7a150..2b85b87 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2888,6 +2888,24 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 */
adapter->name = pci_name(pdev);
adapter->msg_enable = DFLT_MSG_ENABLE;
+
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_enabled(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
err = adap_init0(adapter);
if (err)
goto err_unmap_bar;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index e37dde2..05498e7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -2205,6 +2205,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
struct port_info *pi = netdev_priv(dev);
struct fw_iq_cmd cmd, rpl;
int ret, iqandst, flsz = 0;
+   int relaxed = !(adapter->flags & ROOT_NO_RELAXED_ORDERING);
 
/*
 * If we're using MSI interrupts and we're not initializing the
@@ -2300,6 +2301,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
cpu_to_be32(
FW_IQ_CMD_FL0HOSTFCMODE_V(SGE_HOSTFCMODE_NONE) |
FW_IQ_CMD_FL0PACKEN_F |
+   FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+   FW_IQ_CMD_FL0DATARO_V(relaxed) |
FW_IQ_CMD_FL0PADEN_F);
 
/* In T6, for egress queue type FL there is internal overhead
-- 
1.8.3.1




[PATCH v11 2/5] PCI: Disable Relaxed Ordering for some Intel processors

2017-08-14 Thread Ding Tianhong
According to the Intel spec section 3.9.1 said:

3.9.1 Optimizing PCIe Performance for Accesses Toward Coherent Memory
  and Toward MMIO Regions (P2P)

In order to maximize performance for PCIe devices in the processors
listed in Table 3-6 below, the soft- ware should determine whether the
accesses are toward coherent memory (system memory) or toward MMIO
regions (P2P access to other devices). If the access is toward MMIO
region, then software can command HW to set the RO bit in the TLP
header, as this would allow hardware to achieve maximum throughput for
these types of accesses. For accesses toward coherent memory, software
can command HW to clear the RO bit in the TLP header (no RO), as this
would allow hardware to achieve maximum throughput for these types of
accesses.

Table 3-6. Intel Processor CPU RP Device IDs for Processors Optimizing
   PCIe Performance

ProcessorCPU RP Device IDs

Intel Xeon processors based on   6F01H-6F0EH
Broadwell microarchitecture

Intel Xeon processors based on   2F01H-2F0EH
Haswell microarchitecture

It means some Intel processors has performance issue when use the Relaxed
Ordering Attribute, so disable Relaxed Ordering for these root port.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
Acked-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/quirks.c | 62 
 1 file changed, 62 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 61b59bf..1272f7e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4027,6 +4027,68 @@ static void quirk_relaxedordering_disable(struct pci_dev 
*dev)
 }
 
 /*
+ * Intel Xeon processors based on Broadwell/Haswell microarchitecture Root
+ * Complex has a Flow Control Credit issue which can cause performance
+ * problems with Upstream Transaction Layer Packets with Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f03, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f05, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f06, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f07, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f09, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0a, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0b, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0c, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0d, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0e, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f03, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f05, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_E

[PATCH v11 0/5] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-14 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

v5: Removed the unnecessary code for some function which only return the bool
value, and add the check for VF device.

Make this patch set base on 4.12-rc5.

v6: Fix the logic error in the need to enable the relaxed ordering attribute 
for cxgb4.

v7: The cxgb4 drivers will enable the PCIe Capability Device Control[Relaxed
Ordering Enable] in PCI Probe() routine, this will break our current
solution for some platform which has problematic when enable the relaxed
ordering attribute. According to the latest recommendations, remove the
enable_pcie_relaxed_ordering(), although it could not cover the Peer-to-Peer
scene, but we agree to leave this problem until we really trigger it.

Make this patch set base on 4.12 release version.

v8: Change the second patch title and description to make it more reasonable,
add the acked-by from Alex and Ashok.

Add a new patch to enable the Relaxed Ordering Attribute for cxgb4vf driver.

Make this patch set base on 4.13-rc2.

v9: The document (https://software.intel.com/sites/default/files/managed/9e/
bc/64-ia-32-architectures-optimization-manual.pdf) indicate that the Xeon
processors based on Broadwell/Haswell microarchitecture has the problem
with Relaxed Ordering Attribute enabled, so add the whole list Device ID
from Intel to the patch.

v10: Significant rework based on Bjorn's feedback, reorganize the first 2 
patches,
 now the Intel and AMD erratum soc has been divided to the different 
patches,
 rename the pcie_relaxed_ordering_supported() to 
pcie_relaxed_ordering_enabled(),
 and no need to check every intervening switch except the root ports, update
 some commits.

v11: We shouldn't let the Intel engineer to acked the AMD's erratum patch, fix 
the
 funny mistake.

Casey Leedom (2):
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
  net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (3):
  PCI: Disable PCIe Relaxed Ordering if unsupported
  PCI: Disable Relaxed Ordering for some Intel processors
  PCI: Disable Relaxed Ordering Attributes for AMD A1100

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c| 23 --
 drivers/net/ethernet/chelsio/cxgb4/sge.c   |  5 +-
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h |  1 +
 .../net/ethernet/chelsio

[PATCH v11 1/5] PCI: Disable PCIe Relaxed Ordering if unsupported

2017-08-14 Thread Ding Tianhong
When bit4 is set in the PCIe Device Control register, it indicates
whether the device is permitted to use relaxed ordering.
On some platforms using relaxed ordering can have performance issues or
due to erratum can cause data-corruption. In such cases devices must avoid
using relaxed ordering.

The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that
Relaxed Ordering (RO) attribute should not be used for Transaction Layer
Packets (TLP) targeted towards these affected root complexes.

This patch checks if there is any node in the hierarchy that indicates that
using relaxed ordering is not safe. In such cases the patch turns off the
relaxed ordering by clearing the capability for this device.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Ashok Raj <ashok@intel.com>
Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
Acked-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/pci/probe.c  | 43 +++
 drivers/pci/quirks.c | 11 +++
 include/linux/pci.h  |  3 +++
 3 files changed, 57 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c31310d..779e646 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1762,6 +1762,48 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable
+ * @dev: PCI device to query
+ *
+ * Returns true if the device has enabled relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_enabled(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_enabled);
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   struct pci_dev *root;
+
+   /* PCI_EXP_DEVICE_RELAX_EN is RsvdP in VFs */
+   if (dev->is_virtfn)
+   return;
+
+   if (!pcie_relaxed_ordering_enabled(dev))
+   return;
+
+   /*
+* For now, we only deal with Relaxed Ordering issues with Root
+* Ports. Peer-to-Peer DMA is another can of worms.
+*/
+   root = pci_find_pcie_root_port(dev);
+   if (!root)
+   return;
+
+   if (root->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
+   pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+  PCI_EXP_DEVCTL_RELAX_EN);
+   dev_info(>dev, "Disable Relaxed Ordering because the Root 
Port didn't support it\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1769,6 +1811,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6967c6b..61b59bf 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4016,6 +4016,17 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+   dev_info(>dev, "Disable Relaxed Ordering Attributes to avoid PCIe 
Completion erratum\n");
+}
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66..29606fb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -188,6 +188,8 @@ enum pci_dev_flags {
 * the direct_complete optimization.
 */
PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
@@ -1125,6 +1127,7 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+bool pcie_relaxed_ordering_enabled(struct pci_dev *dev);
 
 /* PCI Virtual Channel */
 int pci_save_vc_state(struct pci_dev *dev);
-- 
1.8.3.1




Re: [PATCH v10 3/5] PCI: Disable Relaxed Ordering Attributes for AMD A1100

2017-08-14 Thread Ding Tianhong


On 2017/8/15 1:19, Raj, Ashok wrote:
> On Mon, Aug 14, 2017 at 11:44:57PM +0800, Ding Tianhong wrote:
>> Casey reported that the AMD ARM A1100 SoC has a bug in its PCIe
>> Root Port where Upstream Transaction Layer Packets with the Relaxed
>> Ordering Attribute clear are allowed to bypass earlier TLPs with
>> Relaxed Ordering set, it would cause Data Corruption, so we need
>> to disable Relaxed Ordering Attribute when Upstream TLPs to the
>> Root Port.
>>
>> Signed-off-by: Casey Leedom <lee...@chelsio.com>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
>> Acked-by: Ashok Raj <ashok@intel.com>
> 
> I can't ack this patch :-).. must be someone from AMD. Please remove my
> signature from this.
> 

Sorry for funny mistake :)  I will fix it.

Ding

>> ---
>>  drivers/pci/quirks.c | 16 
>>  1 file changed, 16 insertions(+)
>>
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index 1272f7e..1407604 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -4089,6 +4089,22 @@ static void quirk_relaxedordering_disable(struct 
>> pci_dev *dev)
>>quirk_relaxedordering_disable);
>>  
>>  /*
>> + * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
>> + * where Upstream Transaction Layer Packets with the Relaxed Ordering
>> + * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
>> + * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
>> + * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
>> + * November 10, 2010).  As a result, on this platform we can't use Relaxed
>> + * Ordering for Upstream TLPs.
>> + */
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +
>> +/*
>>   * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
>>   * values for the Attribute as were supplied in the header of the
>>   * corresponding Request, except as explicitly allowed when IDO is used."
>> -- 
>> 1.8.3.1
>>
>>
> 
> .
> 



[PATCH v10 3/5] PCI: Disable Relaxed Ordering Attributes for AMD A1100

2017-08-14 Thread Ding Tianhong
Casey reported that the AMD ARM A1100 SoC has a bug in its PCIe
Root Port where Upstream Transaction Layer Packets with the Relaxed
Ordering Attribute clear are allowed to bypass earlier TLPs with
Relaxed Ordering set, it would cause Data Corruption, so we need
to disable Relaxed Ordering Attribute when Upstream TLPs to the
Root Port.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
Acked-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/quirks.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 1272f7e..1407604 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4089,6 +4089,22 @@ static void quirk_relaxedordering_disable(struct pci_dev 
*dev)
  quirk_relaxedordering_disable);
 
 /*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
-- 
1.8.3.1




[PATCH v10 0/5] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-14 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

v5: Removed the unnecessary code for some function which only return the bool
value, and add the check for VF device.

Make this patch set base on 4.12-rc5.

v6: Fix the logic error in the need to enable the relaxed ordering attribute 
for cxgb4.

v7: The cxgb4 drivers will enable the PCIe Capability Device Control[Relaxed
Ordering Enable] in PCI Probe() routine, this will break our current
solution for some platform which has problematic when enable the relaxed
ordering attribute. According to the latest recommendations, remove the
enable_pcie_relaxed_ordering(), although it could not cover the Peer-to-Peer
scene, but we agree to leave this problem until we really trigger it.

Make this patch set base on 4.12 release version.

v8: Change the second patch title and description to make it more reasonable,
add the acked-by from Alex and Ashok.

Add a new patch to enable the Relaxed Ordering Attribute for cxgb4vf driver.

Make this patch set base on 4.13-rc2.

v9: The document (https://software.intel.com/sites/default/files/managed/9e/
bc/64-ia-32-architectures-optimization-manual.pdf) indicate that the Xeon
processors based on Broadwell/Haswell microarchitecture has the problem
with Relaxed Ordering Attribute enabled, so add the whole list Device ID
from Intel to the patch.

v10: Significant rework based on Bjorn's feedback, reorganize the first 2 
patches,
 now the Intel and AMD erratum soc has been divided to the different 
patches,
 rename the pcie_relaxed_ordering_supported() to 
pcie_relaxed_ordering_enabled(),
 and no need to check every intervening switch except the root ports, update
 some commits.

Casey Leedom (2):
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
  net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (3):
  PCI: Disable PCIe Relaxed Ordering if unsupported
  PCI: Disable Relaxed Ordering for some Intel processors
  PCI: Disable Relaxed Ordering Attributes for AMD A1100

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c| 23 --
 drivers/net/ethernet/chelsio/cxgb4/sge.c   |  5 +-
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h |  1 +
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c| 18 +
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c |  3 +
 drivers

[PATCH v10 2/5] PCI: Disable Relaxed Ordering for some Intel processors

2017-08-14 Thread Ding Tianhong
According to the Intel spec section 3.9.1 said:

3.9.1 Optimizing PCIe Performance for Accesses Toward Coherent Memory
  and Toward MMIO Regions (P2P)

In order to maximize performance for PCIe devices in the processors
listed in Table 3-6 below, the soft- ware should determine whether the
accesses are toward coherent memory (system memory) or toward MMIO
regions (P2P access to other devices). If the access is toward MMIO
region, then software can command HW to set the RO bit in the TLP
header, as this would allow hardware to achieve maximum throughput for
these types of accesses. For accesses toward coherent memory, software
can command HW to clear the RO bit in the TLP header (no RO), as this
would allow hardware to achieve maximum throughput for these types of
accesses.

Table 3-6. Intel Processor CPU RP Device IDs for Processors Optimizing
   PCIe Performance

ProcessorCPU RP Device IDs

Intel Xeon processors based on   6F01H-6F0EH
Broadwell microarchitecture

Intel Xeon processors based on   2F01H-2F0EH
Haswell microarchitecture

It means some Intel processors has performance issue when use the Relaxed
Ordering Attribute, so disable Relaxed Ordering for these root port.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
Acked-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/quirks.c | 62 
 1 file changed, 62 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 61b59bf..1272f7e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4027,6 +4027,68 @@ static void quirk_relaxedordering_disable(struct pci_dev 
*dev)
 }
 
 /*
+ * Intel Xeon processors based on Broadwell/Haswell microarchitecture Root
+ * Complex has a Flow Control Credit issue which can cause performance
+ * problems with Upstream Transaction Layer Packets with Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f03, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f05, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f06, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f07, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f09, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0a, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0b, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0c, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0d, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0e, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f03, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f05, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_E

[PATCH v10 4/5] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-14 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Remove the enable_pcie_relaxed_ordering() to avoid enable PCIe Capability
Device Control[Relaxed Ordering Enable] at probe routine, to make sure
the driver will not send the Relaxed Ordering TLPs to the Root Complex which
could not deal the Relaxed Ordering TLPs.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 23 +--
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ef4be78..09ea62e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -529,6 +529,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e403fa1..33bb867 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4654,11 +4654,6 @@ static void print_port_info(const struct net_device *dev)
dev->name, adap->params.vpd.id, adap->name, buf);
 }
 
-static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
-{
-   pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
-}
-
 /*
  * Free the following resources:
  * - memory used for tables
@@ -4908,7 +4903,6 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
}
 
pci_enable_pcie_error_reporting(pdev);
-   enable_pcie_relaxed_ordering(pdev);
pci_set_master(pdev);
pci_save_state(pdev);
 
@@ -4947,6 +4941,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_enabled(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ede1220..4ef68f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2719,6 +2719,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2772,8 +2773,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.8.3.1




[PATCH v10 5/5] net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-14 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4vf Ethernet driver now queries PCIe configuration space to
determine if it can send TLPs to it with the Relaxed Ordering
Attribute set, just like the pf did.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 18 ++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c  |  3 +++
 3 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h 
b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 109bc63..08c6ddb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -408,6 +408,7 @@ enum { /* adapter flags */
USING_MSI  = (1UL << 1),
USING_MSIX = (1UL << 2),
QUEUES_BOUND   = (1UL << 3),
+   ROOT_NO_RELAXED_ORDERING = (1UL << 4),
 };
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index ac7a150..2b85b87 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2888,6 +2888,24 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 */
adapter->name = pci_name(pdev);
adapter->msg_enable = DFLT_MSG_ENABLE;
+
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_enabled(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
err = adap_init0(adapter);
if (err)
goto err_unmap_bar;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index e37dde2..05498e7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -2205,6 +2205,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
struct port_info *pi = netdev_priv(dev);
struct fw_iq_cmd cmd, rpl;
int ret, iqandst, flsz = 0;
+   int relaxed = !(adapter->flags & ROOT_NO_RELAXED_ORDERING);
 
/*
 * If we're using MSI interrupts and we're not initializing the
@@ -2300,6 +2301,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
cpu_to_be32(
FW_IQ_CMD_FL0HOSTFCMODE_V(SGE_HOSTFCMODE_NONE) |
FW_IQ_CMD_FL0PACKEN_F |
+   FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+   FW_IQ_CMD_FL0DATARO_V(relaxed) |
FW_IQ_CMD_FL0PADEN_F);
 
/* In T6, for egress queue type FL there is internal overhead
-- 
1.8.3.1




[PATCH v10 1/5] PCI: Disable PCIe Relaxed Ordering if unsupported

2017-08-14 Thread Ding Tianhong
When bit4 is set in the PCIe Device Control register, it indicates
whether the device is permitted to use relaxed ordering.
On some platforms using relaxed ordering can have performance issues or
due to erratum can cause data-corruption. In such cases devices must avoid
using relaxed ordering.

The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that
Relaxed Ordering (RO) attribute should not be used for Transaction Layer
Packets (TLP) targeted towards these affected root complexes.

This patch checks if there is any node in the hierarchy that indicates that
using relaxed ordering is not safe. In such cases the patch turns off the
relaxed ordering by clearing the capability for this device.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Ashok Raj <ashok@intel.com>
Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
Acked-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/pci/probe.c  | 43 +++
 drivers/pci/quirks.c | 11 +++
 include/linux/pci.h  |  3 +++
 3 files changed, 57 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c31310d..779e646 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1762,6 +1762,48 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable
+ * @dev: PCI device to query
+ *
+ * Returns true if the device has enabled relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_enabled(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_enabled);
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   struct pci_dev *root;
+
+   /* PCI_EXP_DEVICE_RELAX_EN is RsvdP in VFs */
+   if (dev->is_virtfn)
+   return;
+
+   if (!pcie_relaxed_ordering_enabled(dev))
+   return;
+
+   /*
+* For now, we only deal with Relaxed Ordering issues with Root
+* Ports. Peer-to-Peer DMA is another can of worms.
+*/
+   root = pci_find_pcie_root_port(dev);
+   if (!root)
+   return;
+
+   if (root->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
+   pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+  PCI_EXP_DEVCTL_RELAX_EN);
+   dev_info(>dev, "Disable Relaxed Ordering because the Root 
Port didn't support it\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1769,6 +1811,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6967c6b..61b59bf 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4016,6 +4016,17 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+   dev_info(>dev, "Disable Relaxed Ordering Attributes to avoid PCIe 
Completion erratum\n");
+}
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66..29606fb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -188,6 +188,8 @@ enum pci_dev_flags {
 * the direct_complete optimization.
 */
PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
@@ -1125,6 +1127,7 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+bool pcie_relaxed_ordering_enabled(struct pci_dev *dev);
 
 /* PCI Virtual Channel */
 int pci_save_vc_state(struct pci_dev *dev);
-- 
1.8.3.1




Re: [PATCH v9 2/4] PCI: Disable PCIe Relaxed Ordering if unsupported

2017-08-09 Thread Ding Tianhong
On 2017/8/9 11:25, Bjorn Helgaas wrote:
> On Tue, Aug 08, 2017 at 09:22:39PM -0500, Bjorn Helgaas wrote:
>> On Sat, Aug 05, 2017 at 03:15:11PM +0800, Ding Tianhong wrote:
>>> When bit4 is set in the PCIe Device Control register, it indicates

> After looking at the driver, I wonder if it would be simpler like
> this:
> 
>   int pcie_relaxed_ordering_enabled(struct pci_dev *dev)
>   {
> u16 ctl;
> 
> pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
> return ctl & PCI_EXP_DEVCTL_RELAX_EN;
>   }
>   EXPORT_SYMBOL(pcie_relaxed_ordering_enabled);
> 
>   static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>   {
> struct pci_dev *root;
> 
> if (dev->is_virtfn)
>   return;  /* PCI_EXP_DEVCTL_RELAX_EN is RsvdP in VFs */
> 
> if (!pcie_relaxed_ordering_enabled(dev))
>   return;
> 
> /*
>  * For now, we only deal with Relaxed Ordering issues with Root
>  * Ports.  Peer-to-peer DMA is another can of worms.
>  */
> root = pci_find_pcie_root_port(dev);
> if (!root)
>   return;
> 
> if (root->relaxed_ordering_broken)
>   pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
>  PCI_EXP_DEVCTL_RELAX_EN);
>   }
> 
> This doesn't check every intervening switch, but I don't think we know
> about any issues except with root ports.
> 

Yes

> And the driver could do:
> 
>   if (!pcie_relaxed_ordering_enabled(pdev))
> adapter->flags |= ROOT_NO_RELAXED_ORDERING;
> 
> The driver code wouldn't show anything about coherent memory vs.
> peer-to-peer, but we really don't have a clue about how to handle that
> yet anyway.
> 
> I guess this is back to exactly what you proposed, except that I
> changed the name of pcie_relaxed_ordering_supported() to
> pcie_relaxed_ordering_enabled(), which I think is slightly more
> specific from the device's point of view.
> 

OK, looks like we reach a consensus finally, I will follow your new opinion and 
resend, thanks.

Ding

> Bjorn
> 
> .
> 



Re: [PATCH v9 2/4] PCI: Disable PCIe Relaxed Ordering if unsupported

2017-08-09 Thread Ding Tianhong
Hi Bjorn:

On 2017/8/9 10:22, Bjorn Helgaas wrote:
> On Sat, Aug 05, 2017 at 03:15:11PM +0800, Ding Tianhong wrote:
>> When bit4 is set in the PCIe Device Control register, it indicates
>> whether the device is permitted to use relaxed ordering.
>> On some platforms using relaxed ordering can have performance issues or
>> due to erratum can cause data-corruption. In such cases devices must avoid
>> using relaxed ordering.
>>
>> This patch checks if there is any node in the hierarchy that indicates that
>> using relaxed ordering is not safe. 
> 
> I think you only check the devices between the root port and the
> target device.  For example, you don't check siblings or cousins of
> the target device.
> 

OK, update the description.

>> In such cases the patch turns off the
>> relaxed ordering by clearing the eapability for this device.
> 
> s/eapability/capability/
> 
>> And if the
>> device is probably running in a guest machine, we should do nothing.
> 
> I don't know what this sentence means.  "Probably running in a guest
> machine" doesn't really make sense, and there's nothing in your patch
> that explicitly checks for being in a guest machine.
> 

Alex noticed that we should do nothing if in the virtual machine because
the Root Complex is NULL at that time, so I think this word should be more
clearly here.

>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
>> Acked-by: Ashok Raj <ashok@intel.com>
>> ---
>>  drivers/pci/pci.c   | 29 +
>>  drivers/pci/probe.c | 37 +
>>  include/linux/pci.h |  2 ++
>>  3 files changed, 68 insertions(+)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index af0cc34..4f9d7c1 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -4854,6 +4854,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>  EXPORT_SYMBOL(pcie_set_mps);
>>  
>>  /**
>> + * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible clear relaxed ordering
> 
> Why "If possible"?  The bit is required to be RW or hardwired to zero,
> so PCI_EXP_DEVCTL_RELAX_EN should *always* be zero when this returns.
> 

OK

>> + */
>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>> +{
...
>>
>> ___
>> linux-arm-kernel mailing list
>> linux-arm-ker...@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
> 



Re: [PATCH v9 1/4] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-08-09 Thread Ding Tianhong


On 2017/8/9 11:02, Bjorn Helgaas wrote:
> On Wed, Aug 09, 2017 at 01:40:01AM +, Casey Leedom wrote:
>> | From: Bjorn Helgaas 
>> | Sent: Tuesday, August 8, 2017 4:22 PM
>> | 
>> | This needs to include a link to the Intel spec
>> | 
>> (https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf,
>> | sec 3.9.1).
>>
>>   In the commit message or as a comment?  Regardless, I agree.  It's always
>> nice to be able to go back and see what the official documentation says.
>> However, that said, links on the internet are ... fragile as time goes by,
>> so we might want to simply quote section 3.9.1 in the commit message since
>> it's relatively short:
>>
>> 3.9.1 Optimizing PCIe Performance for Accesses Toward Coherent Memory
>>   and Toward MMIO Regions (P2P)
>>
>> In order to maximize performance for PCIe devices in the processors
>> listed in Table 3-6 below, the soft- ware should determine whether the
>> accesses are toward coherent memory (system memory) or toward MMIO
>> regions (P2P access to other devices). If the access is toward MMIO
>> region, then software can command HW to set the RO bit in the TLP
>> header, as this would allow hardware to achieve maximum throughput for
>> these types of accesses. For accesses toward coherent memory, software
>> can command HW to clear the RO bit in the TLP header (no RO), as this
>> would allow hardware to achieve maximum throughput for these types of
>> accesses.
>>
>> Table 3-6. Intel Processor CPU RP Device IDs for Processors Optimizing
>>PCIe Performance
>>
>> ProcessorCPU RP Device IDs
>>
>> Intel Xeon processors based on   6F01H-6F0EH
>> Broadwell microarchitecture
>>
>> Intel Xeon processors based on   2F01H-2F0EH
>> Haswell microarchitecture
> 
> Agreed, links are prone to being broken.  I would include in the
> changelog the complete title and order number, along with the link as
> a footnote.  Wouldn't hurt to quote the section too, since it's short.
> 

OK

>> | It should also include a pointer to the AMD erratum, if available, or
>> | at least some reference to how we know it doesn't obey the rules.
>>
>>   Getting an ACK from AMD seems like a forlorn cause at this point.  My
>> contact was Bob Shaw  and he stopped responding to me
>> messages almost a year ago saying that all of AMD's energies were being
>> redirected towards upcoming x86 products (likely Ryzen as we now know).  As
>> far as I can tell AMD has walked away from their A1100 (AKA "Seattle") ARM
>> SoC.
>>
>>   On the specific issue, I can certainly write up somthing even more
>> extensive than I wrote up for the comment in drivers/pci/quirks.c.  Please
>> review the comment I wrote up and tell me if you'd like something even more
>> detailed -- I'm usually acused of writing comments which are too long, so
>> this would be a new one on me ... :-)
> 
> If you have any bug reports with info about how you debugged it and
> concluded that Seattle is broken, you could include a link (probably
> in the changelog).  But if there isn't anything, there isn't anything.
> 
> I might reorganize those patches as:
> 
>   1) Add a PCI_DEV_FLAGS_RELAXED_ORDERING_BROKEN flag, the quirk that
>   sets it, and the current patch [2/4] that uses it.
> 
>   2) Add the Intel DECLARE_PCI_FIXUP_CLASS_EARLY()s with the Intel
>   details.
> 
>   3) Add the AMD DECLARE_PCI_FIXUP_CLASS_EARLY()s with the AMD
>   details.
> 

OK, I could reorganize it, but still need the Casey to give me the link
for the Seattle, otherwise I could remove the AMD part and wait until
someone show it. Thanks

Ding
> .
> 



Re: [PATCH v9 0/4] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-06 Thread Ding Tianhong


On 2017/8/7 11:47, David Miller wrote:
> From: Ding Tianhong <dingtianh...@huawei.com>
> Date: Sat, 5 Aug 2017 15:15:09 +0800
> 
>> Some devices have problems with Transaction Layer Packets with the Relaxed
>> Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
>> PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
>> devices with Relaxed Ordering issues, and a use of this new flag by the
>> cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
>> Ports.
>>
>> It's been years since I've submitted kernel.org patches, I appolgise for the
>> almost certain submission errors.
> 
> Which tree should merge this?  The PCI tree or my networking tree?
> 

Hi David:

I think networking tree merge it is a better choice, as it mainly used to tell 
the NIC
drivers how to use the Relaxed Ordering Attribute, and later we need send patch 
to enable
RO for ixgbe driver base on this patch. But I am not sure whether Bjorn has 
some of his own
view. :)

Hi Bjorn:

Could you help review this patch or give some feedback ?

Thanks
Ding
> .
> 



[PATCH v9 4/4] net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-05 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4vf Ethernet driver now queries PCIe configuration space to
determine if it can send TLPs to it with the Relaxed Ordering
Attribute set, just like the pf did.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 18 ++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c  |  3 +++
 3 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h 
b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 109bc63..08c6ddb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -408,6 +408,7 @@ enum { /* adapter flags */
USING_MSI  = (1UL << 1),
USING_MSIX = (1UL << 2),
QUEUES_BOUND   = (1UL << 3),
+   ROOT_NO_RELAXED_ORDERING = (1UL << 4),
 };
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index ac7a150..59e7639 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2888,6 +2888,24 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 */
adapter->name = pci_name(pdev);
adapter->msg_enable = DFLT_MSG_ENABLE;
+
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
err = adap_init0(adapter);
if (err)
goto err_unmap_bar;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index e37dde2..05498e7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -2205,6 +2205,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
struct port_info *pi = netdev_priv(dev);
struct fw_iq_cmd cmd, rpl;
int ret, iqandst, flsz = 0;
+   int relaxed = !(adapter->flags & ROOT_NO_RELAXED_ORDERING);
 
/*
 * If we're using MSI interrupts and we're not initializing the
@@ -2300,6 +2301,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
cpu_to_be32(
FW_IQ_CMD_FL0HOSTFCMODE_V(SGE_HOSTFCMODE_NONE) |
FW_IQ_CMD_FL0PACKEN_F |
+   FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+   FW_IQ_CMD_FL0DATARO_V(relaxed) |
FW_IQ_CMD_FL0PADEN_F);
 
/* In T6, for egress queue type FL there is internal overhead
-- 
1.8.3.1




[PATCH v9 1/4] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-08-05 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that
Relaxed Ordering (RO) attribute should not be used for Transaction Layer
Packets (TLP) targetted towards these affected root complexes. Current list
of affected parts include some Intel Xeon processors root complex which suffers 
from
flow control credits that result in performance issues. On these affected
parts RO can still be used for peer-2-peer traffic. AMD A1100 ARM ("SEATTLE")
Root complexes don't obey PCIe 3.0 ordering rules, hence could lead to
data-corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/quirks.c | 88 
 include/linux/pci.h  |  2 ++
 2 files changed, 90 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6967c6b..5c9e125 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4016,6 +4016,94 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel Xeon processors based on Broadwell/Haswell microarchitecture Root
+ * Complex has a Flow Control Credit issue which can cause performance
+ * problems with Upstream Transaction Layer Packets with Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f03, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f05, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f06, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f07, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f09, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0a, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0b, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0c, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0d, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0e, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f03, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f05, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f06, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f07, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INT

[PATCH v9 2/4] PCI: Disable PCIe Relaxed Ordering if unsupported

2017-08-05 Thread Ding Tianhong
When bit4 is set in the PCIe Device Control register, it indicates
whether the device is permitted to use relaxed ordering.
On some platforms using relaxed ordering can have performance issues or
due to erratum can cause data-corruption. In such cases devices must avoid
using relaxed ordering.

This patch checks if there is any node in the hierarchy that indicates that
using relaxed ordering is not safe. In such cases the patch turns off the
relaxed ordering by clearing the eapability for this device. And if the
device is probably running in a guest machine, we should do nothing.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
Acked-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/pci.c   | 29 +
 drivers/pci/probe.c | 37 +
 include/linux/pci.h |  2 ++
 3 files changed, 68 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc34..4f9d7c1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4854,6 +4854,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
+ * @dev: PCI device to query
+ *
+ * Returns true if the device support relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
+
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c31310d..48df012 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1762,6 +1762,42 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pci_dev_should_disable_relaxed_ordering - check if the PCI device
+ * should disable the relaxed ordering attribute.
+ * @dev: PCI device
+ *
+ * Return true if any of the PCI devices above us do not support
+ * relaxed ordering.
+ */
+static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
+{
+   while (dev) {
+   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING)
+   return true;
+
+   dev = dev->bus->self;
+   }
+
+   return false;
+}
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   /* We should not alter the relaxed ordering bit for the VF */
+   if (dev->is_virtfn)
+   return;
+
+   /* If the releaxed ordering enable bit is not set, do nothing. */
+   if (!pcie_relaxed_ordering_supported(dev))
+   return;
+
+   if (pci_dev_should_disable_relaxed_ordering(dev)) {
+   pcie_clear_relaxed_ordering(dev);
+   dev_info(>dev, "Disable Relaxed Ordering\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1769,6 +1805,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 412ec1c..3aa23a2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1127,6 +1127,8 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev);
 
 /* PCI Virtual Channel */
 int pci_save_vc_state(struct pci_dev *dev);
-- 
1.8.3.1




[PATCH v9 0/4] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-05 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

v5: Removed the unnecessary code for some function which only return the bool
value, and add the check for VF device.

Make this patch set base on 4.12-rc5.

v6: Fix the logic error in the need to enable the relaxed ordering attribute 
for cxgb4.

v7: The cxgb4 drivers will enable the PCIe Capability Device Control[Relaxed
Ordering Enable] in PCI Probe() routine, this will break our current
solution for some platform which has problematic when enable the relaxed
ordering attribute. According to the latest recommendations, remove the
enable_pcie_relaxed_ordering(), although it could not cover the Peer-to-Peer
scene, but we agree to leave this problem until we really trigger it.

Make this patch set base on 4.12 release version.

v8: Change the second patch title and description to make it more reasonable,
add the acked-by from Alex and Ashok.

Add a new patch to enable the Relaxed Ordering Attribute for cxgb4vf driver.

Make this patch set base on 4.13-rc2.

v9: The document (https://software.intel.com/sites/default/files/managed/9e/
bc/64-ia-32-architectures-optimization-manual.pdf) indicate that the Xeon
processors based on Broadwell/Haswell microarchitecture has the problem
with Relaxed Ordering Attribute enabled, so add the whole list Device ID
from Intel to the patch.

Casey Leedom (3):
  PCI: Add new PCIe Fabric End Node flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
  net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (1):
  PCI: Disable PCIe Relaxed Ordering if unsupported

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c| 23 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c   |  5 +--
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h |  1 +
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c| 18 ++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c |  3 ++
 drivers/pci/pci.c  | 29 +
 drivers/pci/probe.c| 37 +
 drivers/pci/quirks.c   | 88 ++
 include/linux/pci.h|  4 +++
 10 files changed, 151 insertions(+), 8 deletions(-)

-- 
1.8.3.1




[PATCH v9 3/4] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-05 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Remove the enable_pcie_relaxed_ordering() to avoid enable PCIe Capability
Device Control[Relaxed Ordering Enable] at probe routine, to make sure
the driver will not send the Relaxed Ordering TLPs to the Root Complex which
could not deal the Relaxed Ordering TLPs.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 23 +--
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ef4be78..09ea62e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -529,6 +529,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e403fa1..391e484 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4654,11 +4654,6 @@ static void print_port_info(const struct net_device *dev)
dev->name, adap->params.vpd.id, adap->name, buf);
 }
 
-static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
-{
-   pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
-}
-
 /*
  * Free the following resources:
  * - memory used for tables
@@ -4908,7 +4903,6 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
}
 
pci_enable_pcie_error_reporting(pdev);
-   enable_pcie_relaxed_ordering(pdev);
pci_set_master(pdev);
pci_save_state(pdev);
 
@@ -4947,6 +4941,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ede1220..4ef68f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2719,6 +2719,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2772,8 +2773,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.8.3.1




Re: [PATCH v8 1/4] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-08-05 Thread Ding Tianhong


On 2017/8/5 5:06, Casey Leedom wrote:
> | From: Ding Tianhong <dingtianh...@huawei.com>
> | Sent: Thursday, August 3, 2017 6:44 AM
> |
> | diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> | index 6967c6b..1e1cdbe 100644
> | --- a/drivers/pci/quirks.c
> | +++ b/drivers/pci/quirks.c
> | @@ -4016,6 +4016,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
> |quirk_tw686x_class);
> |
> |  /*
> | + * Some devices have problems with Transaction Layer Packets with the 
> Relaxed
> | + * Ordering Attribute set.  Such devices should mark themselves and other
> | + * Device Drivers should check before sending TLPs with RO set.
> | + */
> | +static void quirk_relaxedordering_disable(struct pci_dev *dev)
> | +{
> | +   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
> | +}
> | +
> | +/*
> | + * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
> | + * cause performance problems with Upstream Transaction Layer Packets with
> | + * Relaxed Ordering set.
> | + */
> | +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
> PCI_CLASS_NOT_DEFINED, 8,
> | + quirk_relaxedordering_disable);
> | +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
> PCI_CLASS_NOT_DEFINED, 8,
> | + quirk_relaxedordering_disable);
> | +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
> PCI_CLASS_NOT_DEFINED, 8,
> | + quirk_relaxedordering_disable);
> | + ...
> 
> It looks like this is missing the set of Root Complex IDs that were noted in
> the document to which Patrick Cramer sent us a reference:
> 
> https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf
> 
> In section 3.9.1 we have:
> 
> 3.9.1 Optimizing PCIe Performance for Accesses Toward Coherent Memory
>   and Toward MMIO Regions (P2P)
> 
> In order to maximize performance for PCIe devices in the processors
> listed in Table 3-6 below, the soft- ware should determine whether the
> accesses are toward coherent memory (system memory) or toward MMIO
> regions (P2P access to other devices). If the access is toward MMIO
> region, then software can command HW to set the RO bit in the TLP
> header, as this would allow hardware to achieve maximum throughput for
> these types of accesses. For accesses toward coherent memory, software
> can command HW to clear the RO bit in the TLP header (no RO), as this
> would allow hardware to achieve maximum throughput for these types of
> accesses.
> 
> Table 3-6. Intel Processor CPU RP Device IDs for Processors Optimizing
>PCIe Performance
> 
> ProcessorCPU RP Device IDs
> 
> Intel Xeon processors based on   6F01H-6F0EH
> Broadwell microarchitecture
> 
> Intel Xeon processors based on   2F01H-2F0EH
> Haswell microarchitecture
> 
> The PCI Device IDs you have there are the first ones that I guessed at
> having the performance problem with Relaxed Ordering.  We now apparently
> have a complete list from Intel.
> 
> I don't want to phrase this as a "NAK" because you've gone around the
> mulberry bush a bunch of times already.  So maybe just go with what you've
> got in version 8 of your patch and then do a follow on patch to complete the
> table?
> 
Casey:

Thanks for the good catch, I found that the Ashok has notice this 3 month 
before, I am so sorry to
miss it, it was really a long discussion for this problem, but don't worry, It 
is not a big work to fix it,
I will send the v9 version. :)

Ding

> Casey
> .
> 



[PATCH v8 2/4] PCI: Disable PCIe Relaxed Ordering if unsupported

2017-08-03 Thread Ding Tianhong
When bit4 is set in the PCIe Device Control register, it indicates
whether the device is permitted to use relaxed ordering.
On some platforms using relaxed ordering can have performance issues or
due to erratum can cause data-corruption. In such cases devices must avoid
using relaxed ordering.

This patch checks if there is any node in the hierarchy that indicates that
using relaxed ordering is not safe. In such cases the patch turns off the
relaxed ordering by clearing the eapability for this device. And if the
device is probably running in a guest machine, we should do nothing.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
Acked-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/pci.c   | 29 +
 drivers/pci/probe.c | 37 +
 include/linux/pci.h |  2 ++
 3 files changed, 68 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc34..4f9d7c1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4854,6 +4854,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
+ * @dev: PCI device to query
+ *
+ * Returns true if the device support relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
+
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c31310d..48df012 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1762,6 +1762,42 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pci_dev_should_disable_relaxed_ordering - check if the PCI device
+ * should disable the relaxed ordering attribute.
+ * @dev: PCI device
+ *
+ * Return true if any of the PCI devices above us do not support
+ * relaxed ordering.
+ */
+static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
+{
+   while (dev) {
+   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING)
+   return true;
+
+   dev = dev->bus->self;
+   }
+
+   return false;
+}
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   /* We should not alter the relaxed ordering bit for the VF */
+   if (dev->is_virtfn)
+   return;
+
+   /* If the releaxed ordering enable bit is not set, do nothing. */
+   if (!pcie_relaxed_ordering_supported(dev))
+   return;
+
+   if (pci_dev_should_disable_relaxed_ordering(dev)) {
+   pcie_clear_relaxed_ordering(dev);
+   dev_info(>dev, "Disable Relaxed Ordering\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1769,6 +1805,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 412ec1c..3aa23a2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1127,6 +1127,8 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev);
 
 /* PCI Virtual Channel */
 int pci_save_vc_state(struct pci_dev *dev);
-- 
1.8.3.1




[PATCH v8 1/4] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-08-03 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that
Relaxed Ordering (RO) attribute should not be used for Transaction Layer
Packets (TLP) targetted towards these affected root complexes. Current list
of affected parts include Intel E5-26xx root complex which suffers from 
flow control credits that result in performance issues. On these affected
parts RO can still be used for peer-2-peer traffic. AMD A1100 ARM ("SEATTLE")
Root complexes don't obey PCIe 3.0 ordering rules, hence could lead to
data-corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Acked-by: Ashok Raj <ashok@intel.com>
---
 drivers/pci/quirks.c | 38 ++
 include/linux/pci.h  |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6967c6b..1e1cdbe 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4016,6 +4016,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
+ * cause performance problems with Upstream Transaction Layer Packets with
+ * Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66..412ec1c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -188,6 +188,8 @@ enum pci_dev_flags {
 * the direct_complete optimization.
 */
PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
-- 
1.8.3.1




[PATCH v8 3/4] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-03 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Remove the enable_pcie_relaxed_ordering() to avoid enable PCIe Capability
Device Control[Relaxed Ordering Enable] at probe routine, to make sure
the driver will not send the Relaxed Ordering TLPs to the Root Complex which
could not deal the Relaxed Ordering TLPs.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 23 +--
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ef4be78..09ea62e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -529,6 +529,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e403fa1..391e484 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4654,11 +4654,6 @@ static void print_port_info(const struct net_device *dev)
dev->name, adap->params.vpd.id, adap->name, buf);
 }
 
-static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
-{
-   pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
-}
-
 /*
  * Free the following resources:
  * - memory used for tables
@@ -4908,7 +4903,6 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
}
 
pci_enable_pcie_error_reporting(pdev);
-   enable_pcie_relaxed_ordering(pdev);
pci_set_master(pdev);
pci_save_state(pdev);
 
@@ -4947,6 +4941,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ede1220..4ef68f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2719,6 +2719,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2772,8 +2773,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.8.3.1




[PATCH v8 0/4] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-03 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

v5: Removed the unnecessary code for some function which only return the bool
value, and add the check for VF device.

Make this patch set base on 4.12-rc5.

v6: Fix the logic error in the need to enable the relaxed ordering attribute 
for cxgb4.

v7: The cxgb4 drivers will enable the PCIe Capability Device Control[Relaxed
Ordering Enable] in PCI Probe() routine, this will break our current
solution for some platform which has problematic when enable the relaxed
ordering attribute. According to the latest recommendations, remove the
enable_pcie_relaxed_ordering(), although it could not cover the Peer-to-Peer
scene, but we agree to leave this problem until we really trigger it.

Make this patch set base on 4.12 release version.

v8: Modify the change log for first 2 patches to make it more reasonable, and
add the Acked-by from Alex and Ashok.

Add a new patch to enable the Relaxed Ordering Attribute for cxgb4vf driver.

Make this patch set base on 4.13-rc2.

Casey Leedom (3):
  PCI: Add new PCIe Fabric End Node flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
  net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (1):
  PCI: Disable PCIe Relaxed Ordering if unsupported

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c| 23 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c   |  5 +--
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h |  1 +
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c| 18 ++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c |  3 ++
 drivers/pci/pci.c  | 29 +
 drivers/pci/probe.c| 37 +
 drivers/pci/quirks.c   | 38 ++
 include/linux/pci.h|  4 +++
 10 files changed, 151 insertions(+), 8 deletions(-)

-- 
1.8.3.1




[PATCH v8 4/4] net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-08-03 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4vf Ethernet driver now queries PCIe configuration space to
determine if it can send TLPs to it with the Relaxed Ordering
Attribute set, just like the pf did.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
Reviewed-by: Casey Leedom <lee...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 18 ++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c  |  3 +++
 3 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h 
b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 109bc63..08c6ddb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -408,6 +408,7 @@ enum { /* adapter flags */
USING_MSI  = (1UL << 1),
USING_MSIX = (1UL << 2),
QUEUES_BOUND   = (1UL << 3),
+   ROOT_NO_RELAXED_ORDERING = (1UL << 4),
 };
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index ac7a150..59e7639 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2888,6 +2888,24 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 */
adapter->name = pci_name(pdev);
adapter->msg_enable = DFLT_MSG_ENABLE;
+
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
err = adap_init0(adapter);
if (err)
goto err_unmap_bar;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index e37dde2..05498e7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -2205,6 +2205,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
struct port_info *pi = netdev_priv(dev);
struct fw_iq_cmd cmd, rpl;
int ret, iqandst, flsz = 0;
+   int relaxed = !(adapter->flags & ROOT_NO_RELAXED_ORDERING);
 
/*
 * If we're using MSI interrupts and we're not initializing the
@@ -2300,6 +2301,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct 
sge_rspq *rspq,
cpu_to_be32(
FW_IQ_CMD_FL0HOSTFCMODE_V(SGE_HOSTFCMODE_NONE) |
FW_IQ_CMD_FL0PACKEN_F |
+   FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+   FW_IQ_CMD_FL0DATARO_V(relaxed) |
FW_IQ_CMD_FL0PADEN_F);
 
/* In T6, for egress queue type FL there is internal overhead
-- 
1.8.3.1




Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-08-03 Thread Ding Tianhong


On 2017/8/3 17:13, Raj, Ashok wrote:
> Hi Ding
> 
> patch looks good, except would reword the patch description for clarity
> 
> here is my crack at it, feel free to use.
> 
> On Thu, Jul 13, 2017 at 10:21:31PM +0800, Ding Tianhong wrote:
>> The PCIe Device Control Register use the bit 4 to indicate that
>> whether the device is permitted to enable relaxed ordering or not.
>> But relaxed ordering is not safe for some platform which could only
>> use strong write ordering, so devices are allowed (but not required)
>> to enable relaxed ordering bit by default.
>>
>> If a PCIe device didn't enable the relaxed ordering attribute default,
>> we should not do anything in the PCIe configuration, otherwise we
>> should check if any of the devices above us do not support relaxed
>> ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
>> the result if we get a return that indicate that the relaxed ordering
>> is not supported we should update our device to disable relaxed ordering
>> in configuration space. If the device above us doesn't exist or isn't
>> the PCIe device, we shouldn't do anything and skip updating relaxed ordering
>> because we are probably running in a guest machine.
> 
> When bit4 is set in the PCIe Device Control register, it indicates
> whether the device is permitted to use relaxed ordering.
> On some platforms using relaxed ordering can have performance issues or
> due to erratum can cause data-corruption. In such cases devices must avoid
> using relaxed ordering.
> 
> This patch checks if there is any node in the hierarchy that indicates that
> using relaxed ordering is not safe. In such cases the patch turns off the
> relaxed ordering by clearing the eapability for this device.
> 

Good, thanks for the commit, I will send v8 and update the patch description.

Ding

>>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> ---
>>  drivers/pci/pci.c   | 29 +
>>  drivers/pci/probe.c | 37 +
>>  include/linux/pci.h |  2 ++
>>  3 files changed, 68 insertions(+)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index d88edf5..7a6b32f 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -4854,6 +4854,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>  EXPORT_SYMBOL(pcie_set_mps);
>>  
>>  /**
>> + * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible clear relaxed ordering
>> + */
>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
>> +  PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
>> +
>> +/**
>> + * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
>> + * @dev: PCI device to query
>> + *
>> + * Returns true if the device support relaxed ordering attribute.
>> + */
>> +bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
>> +{
>> +u16 v;
>> +
>> +pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
>> +
>> +return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
>> +
>> +/**
>>   * pcie_get_minimum_link - determine minimum link settings of a PCI device
>>   * @dev: PCI device to query
>>   * @speed: storage for minimum speed
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index c31310d..48df012 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1762,6 +1762,42 @@ static void pci_configure_extended_tags(struct 
>> pci_dev *dev)
>>   PCI_EXP_DEVCTL_EXT_TAG);
>>  }
>>  
>> +/**
>> + * pci_dev_should_disable_relaxed_ordering - check if the PCI device
>> + * should disable the relaxed ordering attribute.
>> + * @dev: PCI device
>> + *
>> + * Return true if any of the PCI devices above us do not support
>> + * relaxed ordering.
>> + */
>> +static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +while (dev) {
>> +if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING)
>> +return true;
>> +
>> +dev = dev->bus->self;
>> +}
>> +
>> +return false;
>> +}
>> +
>> +static void pci_configure_relaxed_ordering(s

Re: [PATCH v7 1/3] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-08-03 Thread Ding Tianhong


On 2017/8/3 16:55, Raj, Ashok wrote:
> Hi Ding
> 
> Not sure if V7 is the last version.
> 
> can you consider rewording this just to make it a little bit more
> readable? My suggestion below, feel free to use/modify
> 
> Otherwise its all good and you can add my Ack.
> 
> Acked-by: Ashok Raj <ashok@intel.com>
> 
> On Thu, Jul 13, 2017 at 10:21:30PM +0800, Ding Tianhong wrote:
>> From: Casey Leedom <lee...@chelsio.com>
> 

Thanks, Ashok. :)

Regards
Ding

> 
>>
>> The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING indicates that the Relaxed
>> Ordering Attribute should not be used on Transaction Layer Packets destined
>> for the PCIe End Node so flagged.  Initially flagged this way are Intel
>> E5-26xx Root Complex Ports which suffer from a Flow Control Credit
>> Performance Problem and AMD A1100 ARM ("SEATTLE") Root Complex Ports which
>> don't obey PCIe 3.0 ordering rules which can lead to Data Corruption.
> 
> The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that
> Relaxed Ordering (RO) attribute should not be used for Transaction Layer
> Packets (TLP) targetted towards these affected root complexes. Current list
> of affected parts include Intel E5-26xx root complex which suffers from 
> flow control credits that result in performance issues. On these affected
> parts RO can still be used for peer-2-peer traffic. AMD A1100 ARM ("SEATTLE")
> Root complexes don't obey PCIe 3.0 ordering rules, hence could lead to
> data-corruption.
>>
>> Signed-off-by: Casey Leedom <lee...@chelsio.com>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> ---
>>  drivers/pci/quirks.c | 38 ++
>>  include/linux/pci.h  |  2 ++
>>  2 files changed, 40 insertions(+)
>>
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index 6967c6b..1e1cdbe 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -4016,6 +4016,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
>>quirk_tw686x_class);
>>  
>>  /*
>> + * Some devices have problems with Transaction Layer Packets with the 
>> Relaxed
>> + * Ordering Attribute set.  Such devices should mark themselves and other
>> + * Device Drivers should check before sending TLPs with RO set.
>> + */
>> +static void quirk_relaxedordering_disable(struct pci_dev *dev)
>> +{
>> +dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
>> +}
>> +
>> +/*
>> + * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
>> + * cause performance problems with Upstream Transaction Layer Packets with
>> + * Relaxed Ordering set.
>> + */
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +
>> +/*
>> + * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
>> + * where Upstream Transaction Layer Packets with the Relaxed Ordering
>> + * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
>> + * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
>> + * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
>> + * November 10, 2010).  As a result, on this platform we can't use Relaxed
>> + * Ordering for Upstream TLPs.
>> + */
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
>> PCI_CLASS_NOT_DEFINED, 8,
>> +  quirk_relaxedordering_disable);
>> +
>> +/*
>>   * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
>>   * values for the Attribute as were supplied in the header of the
>>   * corresponding Request, except as explicitly allowed when IDO is used."
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index 4869e66..412ec1c 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -188,6 +188,8 @@ enum pci_dev_flags {
>>   * the direct_complete optimization.
>>   */
>>  PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
>> +/* Don't use Relaxed Ordering for TLPs directed at this device */
>> +PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12),
>>  };
>>  
>>  enum pci_irq_reroute_variant {
>> -- 
>> 1.8.3.1
>>
>>
> 
> .
> 



Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-27 Thread Ding Tianhong


On 2017/7/28 1:49, Alexander Duyck wrote:
> On Wed, Jul 26, 2017 at 6:08 PM, Ding Tianhong <dingtianh...@huawei.com> 
> wrote:
>>
>>
>> On 2017/7/27 2:26, Casey Leedom wrote:
>>>   By the way Ding, two issues:
>>>
>>>  1. Did we ever get any acknowledgement from either Intel or AMD
>>> on this patch?  I know that we can't ensure that, but it sure would
>>> be nice since the PCI Quirks that we're putting in affect their
>>> products.
>>>
>>
>> Still no Intel and AMD guys has ack this, this is what I am worried about, 
>> should I
>> ping some man again ?
>>
>> Thanks
>> Ding
> 
> 
> I probably wouldn't worry about it too much. If anything all this
> patch is doing is disabling relaxed ordering on the platforms we know
> have issues based on what Casey originally had. If nothing else we can
> follow up once the patches are in the kernel and if somebody has an
> issue then.
> 
> You can include my acked-by, but it is mostly related to how this
> interacts with NICs, and not so much about the PCI chipsets
> themselves.
> 
> Acked-by: Alexander Duyck <alexander.h.du...@intel.com>
> 

Thanks, Alex. :)

> .
> 



Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-27 Thread Ding Tianhong


On 2017/7/28 2:42, Raj, Ashok wrote:
> Hi Casey
> 
>> | Still no Intel and AMD guys has ack this, this is what I am worried about,
>> | should I ping some man again ?
> 
> 
> I can ack the patch set for Intel specific changes. Now that the doc is made
> public :-).
> 

Good, Thanks. :)

> Can you/Ding resend the patch series, i do have the most recent v7, some
> of the commit message wasn't easy to ready. Seems like this patch has
> gotten bigger than originally intended, but seems to be for the overall
> good :-).
> 

OK, I will send v8 patch set and which will update the patch title and add
Casey's new modification for his vf driver, thanks.

Ding

> Sorry for staying silent up until now.
> 
> Cheers,
> Ashok
> 
> .
> 



Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-27 Thread Ding Tianhong


On 2017/7/28 1:44, Casey Leedom wrote:
> | From: Ding Tianhong <dingtianh...@huawei.com>
> | Sent: Wednesday, July 26, 2017 6:01 PM
> |
> | On 2017/7/27 3:05, Casey Leedom wrote:
> | >
> | > Ding, send me a note if you'd like me to work that [cxgb4vf patch] up
> | > for you.
> |
> | Ok, you could send the change log and I could put it in the v8 version
> | together, will you base on the patch 3/3 or build a independence patch?
> 
> Which ever you'd prefer.  It would basically mirror the same exact code that
> you've got for cxgb4.  I.e. testing the setting of the VF's PCIe Capability
> Device Control[Relaxed Ordering Enable], setting a new flag in
> adpater->flags, testing that flag in cxgb4vf/sge.c:t4vf_sge_alloc_rxq().
> But since the VF's PF will already have disabled the PF's Relaxed Ordering
> Enable, the VF will also have it's Relaxed Ordering Enable disabled and any
> effort by the internal chip to send TLPs with the Relaxed Ordering Attribute
> will be gated by the PCIe logic.  So it's not critical that this be in the
> first patch.  Your call.  Let me know if you'd like me to send that to you.
> 

Good, please Send it to me, I will put it together and send the v8 this week,
I think Bjorn will be back next week .:)

> 
> | From: Ding Tianhong <dingtianh...@huawei.com>
> | Sent: Wednesday, July 26, 2017 6:08 PM
> |
> | On 2017/7/27 2:26, Casey Leedom wrote:
> | >
> | >  1. Did we ever get any acknowledgement from either Intel or AMD
> | > on this patch?  I know that we can't ensure that, but it sure would
> | > be nice since the PCI Quirks that we're putting in affect their
> | > products.
> |
> | Still no Intel and AMD guys has ack this, this is what I am worried about,
> | should I ping some man again ?
> 
> By amusing coincidence, Patrik Cramer (now Cc'ed) from Intel sent me a note
> yesterday with a link to the official Intel performance tuning documentation
> which covers this issue:
> 
> https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf
> 
> In section 3.9.1 we have:
> 
> 3.9.1 Optimizing PCIe Performance for Accesses Toward Coherent Memory
>   and Toward MMIO Regions (P2P)
> 
> In order to maximize performance for PCIe devices in the processors
> listed in Table 3-6 below, the soft- ware should determine whether the
> accesses are toward coherent memory (system memory) or toward MMIO
> regions (P2P access to other devices). If the access is toward MMIO
> region, then software can command HW to set the RO bit in the TLP
> header, as this would allow hardware to achieve maximum throughput for
> these types of accesses. For accesses toward coherent memory, software
> can command HW to clear the RO bit in the TLP header (no RO), as this
> would allow hardware to achieve maximum throughput for these types of
> accesses.
> 
> Table 3-6. Intel Processor CPU RP Device IDs for Processors Optimizing
>PCIe Performance
> 
> ProcessorCPU RP Device IDs
> 
> Intel Xeon processors based on   6F01H-6F0EH
> Broadwell microarchitecture
> 
> Intel Xeon processors based on   2F01H-2F0EH
> Haswell microarchitecture
> 
> Unfortunately that's a pretty thin section.  But it does expand the set of
> Intel Root Complexes for which our Linux PCI Quirk will need to cover.  So
> you should add those to the next (and hopefully final) spin of your patch.
> And, it also verifies the need to handle the use of Relaxed Ordering more
> subtlely than simply turning it off since the NVMe peer-to-peer example I
> keep bringing up would fall into the "need to use Relaxed Ordering" case ...
> 
> It would have been nice to know why this is happening and if any future
> processor would fix this.  After all, Relaxed Ordering, is just supposed to
> be a hint.  At worst, a receiving device could just ignore the attribute
> entirely.  Obviously someone made an effort to implement it but ... it
> didn't go the way they wanted.
> 
> And, it also would have been nice to know if there was any hidden register
> in these Intel Root Complexes which can completely turn off the effort to
> pay attention to the Relaxed Ordering Attribute.  We've spend an enormous
> amount of effort on this issue here on the Linux PCI email list struggling
> mightily to come up with a way to determine when it's
> safe/recommended/not-recommended/unsafe to use Relaxed Ordering when
> directing TLPs towards the Root Complex.  And some architectures require RO
> for decent performance so we can't just "turn it off" unilatterally.
> 

I am glad to hear that more person were focus on this problem, It would be great
if they could enter our discussion and give us more suggestion. :)

Thanks
Ding

> Casey
> 
> .
> 



Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-26 Thread Ding Tianhong


On 2017/7/27 2:26, Casey Leedom wrote:
>   By the way Ding, two issues:
> 
>  1. Did we ever get any acknowledgement from either Intel or AMD
> on this patch?  I know that we can't ensure that, but it sure would
> be nice since the PCI Quirks that we're putting in affect their
> products.
> 

Still no Intel and AMD guys has ack this, this is what I am worried about, 
should I
ping some man again ?

Thanks
Ding
> 
> Casey
> .
> 



Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-26 Thread Ding Tianhong


On 2017/7/27 3:05, Casey Leedom wrote:
> | From: Alexander Duyck 
> | Sent: Wednesday, July 26, 2017 11:44 AM
> | 
> | On Jul 26, 2017 11:26 AM, "Casey Leedom"  wrote:
> | |
> | | I think that the patch will need to be extended to modify
> | | drivers/pci.c/iov.c:sriov_enable() to explicitly turn off
> | | Relaxed Ordering Enable if the Root Complex is marked
> | for no RO TLPs.
> | 
> | I'm not sure that would be an issue. Wouldn't most VFs inherit the PF's 
> settings?
> 
> Ah yes, you're right.  This is covered in section 3.5.4 of the Single Root I/O
> Virtualization and Sharing Specification, Revision 1.0 (September 11, 2007),
> governing the PCIe Capability Device Control register.  It states that the VF
> version of that register shall follow the setting of the corresponding PF.
> 
> So we should enhance the cxgb4vf/sge.c:t4vf_sge_alloc_rxq() in the same
> way we did for the cxgb4 driver, but that's not critical since the Relaxed
> Ordering Enable supersedes the internal chip's desire to use the Relaxed
> Ordering Attribute.
> 
> Ding, send me a note if you'd like me to work that up for you.
> 

Ok, you could send the change log and I could put it in the v8 version together,
will you base on the patch 3/3 or build a independence patch?

Ding

> | Also I thought most of the VF configuration space is read only.
> 
> Yes, but not all of it.  And when a VF is exported to a Virtual Machine,
> then the Hypervisor captures and interprets all accesses to the VF's
> PCIe Configuration Space from the VM.
> 
> Thanks again for reminding me of the subtle aspect of the SR_IOV
> specification that I forgot.
> 
> Casey
> .
> 



Re: af_packet: use after free in prb_retire_rx_blk_timer_expired

2017-07-23 Thread Ding Tianhong


On 2017/7/24 9:09, Ding Tianhong wrote:
> 
> 
> On 2017/7/24 1:03, Cong Wang wrote:
>> On Sun, Jul 23, 2017 at 5:48 AM, liujian (CE) <liujia...@huawei.com> wrote:
>>> Hi
>>>
>>> I find it caused by below steps:
>>> 1. set tp_version to TPACKET_V3 and req->tp_block_nr to 1
>>> 2. set tp_block_nr to 0
>>> Then pg_vec was freed, and we did not delete the timer?
>>
>> Thanks for testing!
>>
>> Ah, I overlook the initialization case in my previous patch.
>>
>> How about the following one? Does it cover all the cases?
>>
>>
>> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
>> index 008bb34ee324..0615c2a950fa 100644
>> --- a/net/packet/af_packet.c
>> +++ b/net/packet/af_packet.c
>> @@ -4329,7 +4329,7 @@ static int packet_set_ring(struct sock *sk,
>> union tpacket_req_u *req_u,
>> register_prot_hook(sk);
>> }
>> spin_unlock(>bind_lock);
>> -   if (closing && (po->tp_version > TPACKET_V2)) {
>> +   if (pg_vec && (po->tp_version > TPACKET_V2)) {
>> /* Because we don't support block-based V3 on tx-ring */
>> if (!tx_ring)
>> prb_shutdown_retire_blk_timer(po, rb_queue);
>>
>> .
> 
> Hi, Cong:
> 
> It looks like could not cover the case: req->tp_block_nr = 2 -> 
> reg->tp_block_nr = 1 .
> 

Oh, looks like this case would never happen, so I think your solution is ok.

> what about this way:
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -4331,13 +4331,17 @@ static int packet_set_ring(struct sock *sk, union 
> tpacket_req_u *req_u,
> register_prot_hook(sk);
> }
> spin_unlock(>bind_lock);
> -   if (closing && (po->tp_version > TPACKET_V2)) {
> +   if ((closing || (pg_vec && !reg->tp_block_nr))&& (po->tp_version > 
> TPACKET_V2)) {
> /* Because we don't support block-based V3 on tx-ring */
> if (!tx_ring)
> prb_shutdown_retire_blk_timer(po, rb_queue);
> 
> 

>>
> 
> 
> .
> 



Re: af_packet: use after free in prb_retire_rx_blk_timer_expired

2017-07-23 Thread Ding Tianhong


On 2017/7/24 1:03, Cong Wang wrote:
> On Sun, Jul 23, 2017 at 5:48 AM, liujian (CE)  wrote:
>> Hi
>>
>> I find it caused by below steps:
>> 1. set tp_version to TPACKET_V3 and req->tp_block_nr to 1
>> 2. set tp_block_nr to 0
>> Then pg_vec was freed, and we did not delete the timer?
> 
> Thanks for testing!
> 
> Ah, I overlook the initialization case in my previous patch.
> 
> How about the following one? Does it cover all the cases?
> 
> 
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 008bb34ee324..0615c2a950fa 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -4329,7 +4329,7 @@ static int packet_set_ring(struct sock *sk,
> union tpacket_req_u *req_u,
> register_prot_hook(sk);
> }
> spin_unlock(>bind_lock);
> -   if (closing && (po->tp_version > TPACKET_V2)) {
> +   if (pg_vec && (po->tp_version > TPACKET_V2)) {
> /* Because we don't support block-based V3 on tx-ring */
> if (!tx_ring)
> prb_shutdown_retire_blk_timer(po, rb_queue);
> 
> .

Hi, Cong:

It looks like could not cover the case: req->tp_block_nr = 2 -> 
reg->tp_block_nr = 1 .

what about this way:
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4331,13 +4331,17 @@ static int packet_set_ring(struct sock *sk, union 
tpacket_req_u *req_u,
register_prot_hook(sk);
}
spin_unlock(>bind_lock);
-   if (closing && (po->tp_version > TPACKET_V2)) {
+   if ((closing || (pg_vec && !reg->tp_block_nr))&& (po->tp_version > 
TPACKET_V2)) {
/* Because we don't support block-based V3 on tx-ring */
if (!tx_ring)
prb_shutdown_retire_blk_timer(po, rb_queue);


> 



Re: af_packet: use after free in prb_retire_rx_blk_timer_expired

2017-07-22 Thread Ding Tianhong


On 2017/7/23 3:02, Cong Wang wrote:
> Hello,
> 
> On Sat, Jul 22, 2017 at 2:55 AM, liujian (CE)  wrote:
>> I also hit this issue with trinity test:
>>
>> The call trace:
>>   [exception RIP: prb_retire_rx_blk_timer_expired+70]
>> RIP: 81633be6  RSP: 8801bec03dc0  RFLAGS: 00010246
>> RAX:   RBX: 8801b49d0948  RCX: 
>> RDX: 8801b31057a0  RSI: a56b6b6b6b6b6b6b  RDI: 8801b49d09ec
>> RBP: 8801bec03dd8   R8: 0001   R9: 83e1bf80
>> R10: 0002  R11: 0005  R12: 8801b49d09ec
>> R13: 0100  R14: 81633ba0  R15: 8801b49d0948
>> ORIG_RAX:   CS: 0010  SS: 0018
>>  #7 [8801bec03de0] call_timer_fn at 8108cb76
>>  #8 [8801bec03e18] run_timer_softirq at 8108f87c
>>  #9 [8801bec03e90] __do_softirq at 8108629f
>> #10 [8801bec03f00] call_softirq at 8166a01c
>> #11 [8801bec03f18] do_softirq at 810172ad
>> #12 [8801bec03f30] irq_exit at 81086655
>> #13 [8801bec03f48] msa_irq_exit at 810b1ab3
>> #14 [8801bec03f88] smp_apic_timer_interrupt at 8166aeae
>> #15 [8801bec03fb0] apic_timer_interrupt at 816692dd
>> ---  ---
>>
>> And from vmcore, I can see the pointer GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 
>> is a56b6b6b6b6b6b6b
>>
> 
> Does the following quick fix help?
> 
> 
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 008bb34ee324..09ec1640e5f7 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -4264,6 +4264,7 @@ static int packet_set_ring(struct sock *sk,
> union tpacket_req_u *req_u,
> /* Block transmit is not supported yet */
> if (!tx_ring) {
> init_prb_bdqc(po, rb, pg_vec, req_u);
> +   pg_vec = NULL;
> } else {
> struct tpacket_req3 *req3 = _u->req3;
> 

Hi, Cong:

Thanks for your quirk solution, but I still has some doubts about it,
it looks like fix the problem in the packet_setsockopt->packet_set_ring 
processing,
but when in packet_release processing, it may could not release the
real pg_vec for the TPACKET_V3 ring, and then cause the mem leak,
maybe I miss something here, nice to hear from your feedback. :)

what about fix it this way:
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4335,9 +4335,13 @@ static int packet_set_ring(struct sock *sk, union 
tpacket_req_u *req_u,
/* Because we don't support block-based V3 on tx-ring */
if (!tx_ring)
prb_shutdown_retire_blk_timer(po, rb_queue);
+
+   if (pg_vec)
+   free_pg_vec(pg_vec, order, req->tp_block_nr);
+
}

-   if (pg_vec)
+   if (pg_vec && (po->tp_version < TPACKET_V3))
free_pg_vec(pg_vec, order, req->tp_block_nr);
 out:
release_sock(sk);


Regards
Ding

> .
> 



Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-21 Thread Ding Tianhong
Hi Sinan, Bjorn:

On 2017/7/14 21:54, Sinan Kaya wrote:
> On 7/13/2017 9:26 PM, Ding Tianhong wrote:
>> There is no code to enable the PCIe Relaxed Ordering bit in the 
>> configuration space,
>> it is only be enable by default according to the PCIe Standard 
>> Specification, what we
>> do is to distinguish the RC problematic platform and clear the Relaxed 
>> Ordering bit
>> to tell the PCIe EP don't send any TLPs with Relaxed Ordering Attributes to 
>> the Root
>> Complex.
> 
> Maybe, you should change the patch commit as 
> "Disable PCIe Relaxed Ordering if not supported"...

I agree that to use the new commit title as your suggested, thanks. :)

@Bjorn do you want me to spawn a new patchset with the new commit title
and the Reviewed-by from Casey on the patch 3, or maybe you could pick this
up and modify it own ? thanks.

Ding

> 



Re: [PATCH v7 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-14 Thread Ding Tianhong
Hi Casey, Alexander:

Thanks for the great efforts from both of you, It looks like we have reached a 
consensus finally,
could you please add a confirmation message just like Reviewed-by or something 
else, thanks. :)

Ding

On 2017/7/14 2:44, Casey Leedom wrote:
>   Yeah, I think this works for now.  We'll stumble over what to do when we 
> want to mix upstream TLPs without Relaxed Ordering Attributes directed at 
> problematic Root Complexes, and Peer-to-Peer TLPs with Relaxed Ordering 
> Attributes ... or vice versa depending on which target PCIe Device has issues 
> with Relaxed Ordering.
> 
> 
>   Thanks for all the work!
> 
> 
> Casey
> 
> 



Re: [PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-13 Thread Ding Tianhong


On 2017/7/14 5:09, Sinan Kaya wrote:
> On 7/13/2017 10:21 AM, Ding Tianhong wrote:
>> static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +/* We should not alter the relaxed ordering bit for the VF */
>> +if (dev->is_virtfn)
>> +return;
>> +
>> +/* If the releaxed ordering enable bit is not set, do nothing. */
>> +if (!pcie_relaxed_ordering_supported(dev))
>> +return;
>> +
>> +if (pci_dev_should_disable_relaxed_ordering(dev)) {
>> +pcie_clear_relaxed_ordering(dev);
>> +dev_info(>dev, "Disable Relaxed Ordering\n");
>> +}
>> +}
> 
> I couldn't find anywhere where you actually enable the relaxed ordering
> like the subject suggests.
> 
There is no code to enable the PCIe Relaxed Ordering bit in the configuration 
space,
it is only be enable by default according to the PCIe Standard Specification, 
what we
do is to distinguish the RC problematic platform and clear the Relaxed Ordering 
bit
to tell the PCIe EP don't send any TLPs with Relaxed Ordering Attributes to the 
Root
Complex.

Thanks
Ding



[PATCH v7 1/3] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-07-13 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING indicates that the Relaxed
Ordering Attribute should not be used on Transaction Layer Packets destined
for the PCIe End Node so flagged.  Initially flagged this way are Intel
E5-26xx Root Complex Ports which suffer from a Flow Control Credit
Performance Problem and AMD A1100 ARM ("SEATTLE") Root Complex Ports which
don't obey PCIe 3.0 ordering rules which can lead to Data Corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/quirks.c | 38 ++
 include/linux/pci.h  |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 6967c6b..1e1cdbe 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4016,6 +4016,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
+ * cause performance problems with Upstream Transaction Layer Packets with
+ * Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66..412ec1c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -188,6 +188,8 @@ enum pci_dev_flags {
 * the direct_complete optimization.
 */
PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
-- 
1.8.3.1




[PATCH v7 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-07-13 Thread Ding Tianhong
The PCIe Device Control Register use the bit 4 to indicate that
whether the device is permitted to enable relaxed ordering or not.
But relaxed ordering is not safe for some platform which could only
use strong write ordering, so devices are allowed (but not required)
to enable relaxed ordering bit by default.

If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest machine.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c   | 29 +
 drivers/pci/probe.c | 37 +
 include/linux/pci.h |  2 ++
 3 files changed, 68 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d88edf5..7a6b32f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4854,6 +4854,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
+ * @dev: PCI device to query
+ *
+ * Returns true if the device support relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
+
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c31310d..48df012 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1762,6 +1762,42 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pci_dev_should_disable_relaxed_ordering - check if the PCI device
+ * should disable the relaxed ordering attribute.
+ * @dev: PCI device
+ *
+ * Return true if any of the PCI devices above us do not support
+ * relaxed ordering.
+ */
+static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
+{
+   while (dev) {
+   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING)
+   return true;
+
+   dev = dev->bus->self;
+   }
+
+   return false;
+}
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   /* We should not alter the relaxed ordering bit for the VF */
+   if (dev->is_virtfn)
+   return;
+
+   /* If the releaxed ordering enable bit is not set, do nothing. */
+   if (!pcie_relaxed_ordering_supported(dev))
+   return;
+
+   if (pci_dev_should_disable_relaxed_ordering(dev)) {
+   pcie_clear_relaxed_ordering(dev);
+   dev_info(>dev, "Disable Relaxed Ordering\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1769,6 +1805,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 412ec1c..3aa23a2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1127,6 +1127,8 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev);
 
 /* PCI Virtual Channel */
 int pci_save_vc_state(struct pci_dev *dev);
-- 
1.8.3.1




[PATCH v7 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-13 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.  

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

v5: Removed the unnecessary code for some function which only return the bool
value, and add the check for VF device.

Make this patch set base on 4.12-rc5.

v6: Fix the logic error in the need to enable the relaxed ordering attribute 
for cxgb4.

v7: The cxgb4 drivers will enable the PCIe Capability Device Control[Relaxed
Ordering Enable] in PCI Probe() routine, this will break our current
solution for some platform which has problematic when enable the relaxed
ordering attribute. According to the latest recommendations, remove the
enable_pcie_relaxed_ordering(), although it could not cover the Peer-to-Peer
scene, but we agree to leave this problem until we really trigger it.

Make this patch set base on 4.12 release version.

Casey Leedom (2):
  PCI: Add new PCIe Fabric End Node flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (1):
  PCI: Enable PCIe Relaxed Ordering if supported

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
 drivers/pci/pci.c   | 32 +++
 drivers/pci/probe.c | 41 +
 drivers/pci/quirks.c| 38 +++
 include/linux/pci.h |  4 +++
 7 files changed, 136 insertions(+), 2 deletions(-)

-- 
1.9.0




[PATCH v7 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-13 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Remove the enable_pcie_relaxed_ordering() to avoid enable PCIe Capability
Device Control[Relaxed Ordering Enable] at probe routine, to make sure
the driver will not send the Relaxed Ordering TLPs to the Root Complex which
could not deal the Relaxed Ordering TLPs.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 23 +--
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ef4be78..09ea62e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -529,6 +529,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e403fa1..391e484 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4654,11 +4654,6 @@ static void print_port_info(const struct net_device *dev)
dev->name, adap->params.vpd.id, adap->name, buf);
 }
 
-static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
-{
-   pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
-}
-
 /*
  * Free the following resources:
  * - memory used for tables
@@ -4908,7 +4903,6 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
}
 
pci_enable_pcie_error_reporting(pdev);
-   enable_pcie_relaxed_ordering(pdev);
pci_set_master(pdev);
pci_save_state(pdev);
 
@@ -4947,6 +4941,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ede1220..4ef68f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2719,6 +2719,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2772,8 +2773,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.8.3.1




Re: [PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-12 Thread Ding Tianhong


On 2017/7/13 8:52, Casey Leedom wrote:
>   Sorry again for the delay.  This time at least partially caused by a 
> Chelsio-internal Customer Support request to simply disable Relaxed Ordering 
> entirely due to the performance issues with our 100Gb/s product and 
> relatively recent Intel Root Complexes.  Our Customer Support people are 
> tired of asking customers to try turning off Relaxed Ordering. (sigh)
> 
>   So, first off, I've mentioned a couple of times that the current cxgb4 
> driver hardwired the PCIe Capability Device Control[Relaxed Ordering Enable] 
> on.  Here's the code which does it:
> 
> drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:4657:
> 
> static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
> {
> pcie_capability_set_word(dev, PCI_EXP_DEVCTL, 
> PCI_EXP_DEVCTL_RELAX_EN);
> }

we should remove it.

> 
> This is called from the PCI Probe() routine init_one() later in that file.  I 
> just wanted to make sure people knew about this.  Obviously given our current 
> very difficult thread, this would either need to be diked out or changed or a 
> completely different mechanism put in place.
> 
>   Second, just to make sure everyone's on the same page, the above simply 
> allows the device to send TLPs with the Relaxed Ordering Attribute.  It 
> doesn't cause TLPs to suddenly all be sent with RO set.  The use of Relaxed 
> Ordering is selective.  For instance, in our hardware we can configure the RX 
> Path to use RO on Ingress Packet Data delivery to Free List Buffers, but not 
> use RO for delivery of messages noting newly delivered Ingress Packet Data.  
> Doing this allows the destination PCIe target to [potentially] optimize the 
> DMA Writes to it based on local conditions (memory controller channel 
> availability, etc.), but ensure that the message noting newly delivered 
> Ingress Packet Data isn't processed till all of the preceding TLPs with RO 
> set containing Ingress Packet Data have been processed.  (This by the way is 
> the essence of the AMD A1100 ARM SoC bug: its Root Complex isn't obeying that 
> PCIe ordering rule.)
> 
>   Third, as noted above, I'm getting a lot of pressure to get this addressed 
> sooner than later, so I think that we should go with something fairly simple 
> along the lines that you guys are proposing and I'll stop whining about the 
> problem of needing to handle Peer-to-Peer with Relaxed Ordering while not 
> using it for deliveries to the Root Complex.  We can just wait for that 
> kettle of fish to explode on us and deal with the mess then.  (Hhmmm, the 
> mixed metaphor landed in an entirely different place than I originally 
> intended ... :-))
> 

Ok, we could fix them when we trigger this, I think it is not a big problem.

>   If we try to stick as closely to Ding's latest patch set as possible, then 
> we can probably just add the diff to remove the 
> enable_pcie_relaxed_ordering() code in cxgb4_main.c.
> 

If no other more suggestion, I will send a new version and remove the 
enable_pcie_relaxed_ordering(), thanks.  :)

Ding
> Casey
> .
> 



Re: [PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-12 Thread Ding Tianhong


On 2017/7/11 8:01, Casey Leedom wrote:
> 
> Hey Alexander,
> 
>   Okay, I understand your point regarding the "most likely scenario" being
> TLPs directed upstream to the Root Complex.  But I'd still like to make sure
> that we have an agreed upon API/methodology for doing Peer-to-Peer with
> Relaxed Ordering and no Relaxed Ordering to the Root Complex.  I don't see
> how the proposed APIs can be used in that fashion.
>  
>   Right now the proposed change for cxgb4 is for it to test its own PCIe
> Capability Device Control[Relaxed Ordering Enable] in order to use that
> information to program the Chelsio Hardware to emit/not emit upstream TLPs
> with the Relaxed Ordering Attribute set.  But if we're going to have the
> mixed mode situation I describe, the PCIe Capability Device Control[Relaxed
> Ordering Enable] will have to be set which means that we'll be programming
> the Chelsio Hardware to send upstream TLPs with Relaxed Ordering Enable to
> the Root Complex which is what we were trying to avoid in the first place ...
> 
>   [[ And, as I noted on Friday evening, the currect cxgb4 Driver hardwires
>  the Relaxed Ordering Enable on early dureing device probe, so that
>  would minimally need to be addressed even if we decide that we don't
>  ever want to support mixed mode Relaxed Ordering. ]]
> 
>   We need some method of telling the Chelsio Driver that it should/shouldn't
> use Relaxed Ordering with TLPs directed at the Root Complex.  And the same
> is true for a Peer PCIe Device.
> 
>   It may be that we should approach this from the completely opposite
> direction and instead of having quirks which identify problematic devices,
> have quirks which identify devices which would benefit from the use of
> Relaxed Ordering (if the sending device supports that).  That is, assume the
> using Relaxed Ordering shouldn't be done unless the target device says "I
> love Relaxed Ordering TLPs" ...  In such a world, an NVMe or a Graphics
> device might declare love of Relaxed Ordering and the same for a SPARC Root
> Complex (I think that was your example).
> 
>   By the way, the sole example of Data Corruption with Relaxed Ordering is
> the AMD A1100 ARM SoC and AMD appears to have given up on that almost as
> soon as it was released.  So what we're left with currently is a performance
> problem on modern Intel CPUs ...  (And hopefully we'll get a Technical
> Publication on that issue fairly soon.)
> 
> Casey
> 

Hi Casey:

After the long discuss, I think If the PCIe Capability Device Control[Relaxed 
Ordering
Enable] to be cleared when the platform's RC has some problematic for RO didn't 
break
anything in your driver, I think you could choose to check the
(!pci_dev_should_disable_relaxed_ordering(root)) in the code to to enable
ROOT_NO_RELAXED_ORDERING for your adapter, and enable the PCIe Capability 
Device Control
[Relaxed Ordering Enable] bit when you need it, I think we don't have much gap 
here.
And we could leave the pear-to-pear situation to be fixed later.

Thanks
Ding

> .
> 



Re: [PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-10 Thread Ding Tianhong
Hi Casey:

On 2017/7/8 10:04, Casey Leedom wrote:
>   Okay, thanks for the note Alexander.I'll have to look more closely 
> at
> the patch on Monday and try it out on one of the targeted systems to verify
> the semantics you describe.
> 

All the modification is only clearing the device's Device Control{Relaxed 
Ordering
Enable]bit when distinguish that the platform should not support RO and did 
nothing
to the RC configuration, so I don't think it will break anything compare to the
first version from yours.

>   However, that said, there is no way to tell a priori where a device will
> send TLPs.  To simply assume that all TLPs will be directed towards the Root
> Complex is a big assumption.  Only the device and the code controlling it
> know where the TLPs will be directed.  That's why there are changes required
> in the cxgb4 driver.  For instance, the code in
> drivers/net/ethernet/chelsio./cxgb4/sge.c: t4_sge_alloc_rxq() knows that
> it's allocating Free List Buffers in Host Memory and that the RX Queues that
> it's allocating in the Hardware will eventually send Ingress Data to those
> Free List Buffers.  (And similarly for the Free List Buffer Pointer Queue
> with respect to DMA Reads from the host.)  In that routine we explicitly
> configure the Hardware to use/not-use the Relaxed Ordering Attribute via the
> FW_IQ_CMD_FL0FETCHRO and FW_IQ_CMD_FL0DATARO flags.  Basically we're
> conditionally setting them based on the desirability of sending Relaxed
> Ordering TLPs to the Root Complex.  (And we would perform the same kind of
> check for an nVME application ... which brings us to ...)
> 
>   And what would be the code using these patch APIs to set up a Peer-to-Peer
> nVME-style application?In that case we'd need the Chelsio adapter's 
> PCIe
> Capability Device Control[Relaxed Ordering Enable] set for the nVME
> application ... and we would avoid programming the Chelsio Hardware to use
> Relaxed Ordering for TLPs directed at the Root Complex.Thus we would 
> be in
> a position where some TLPs being emitted by the device to Peer devices would
> have Relaxed Ordering set and some directed at the Root Complex would not.
> And the only way for that to work is if the source device's Device
> Control[Relaxed Ordering Enable] is set ...
> 
>   Finally, setting aside my disagreements with the patch, we still have the
> code in the cxgb4 driver which explicitly turns on its own Device
> Control[Relaxed Ordering Enable] in cxgb4_main.c:
> enable_pcie_relaxed_ordering().  So the patch is something of a loop if all
> we're doing is testing our own Relaxed Ordering Enable state ...
>  
> Casey
> 
> .
> 



Re: [PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-06 Thread Ding Tianhong


On 2017/7/7 1:17, Bjorn Helgaas wrote:
> On Thu, Jul 06, 2017 at 08:58:51PM +0800, Ding Tianhong wrote:
>> Hi Bjorn:
>>
>> Could you please give some feedback about this patchset, it looks like no 
>> more comments for more than a week,
>> thanks. :)
> 
> I was on vacation when you posted it, but don't worry, it's still in
> the queue:
> 
>   https://patchwork.ozlabs.org/project/linux-pci/list
> 
> v4.12 was just released, so it's obviously too late for that.  The
> v4.13 merge window is open, so it's too late for v4.13 as well (we
> need stuff in -next before the merge window).
> 
> There's still plenty of time to work on this for v4.14.
> 

OK, thanks.

> Bjorn
> 
> .
> 



Re: [PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-07-06 Thread Ding Tianhong
Hi Bjorn:

Could you please give some feedback about this patchset, it looks like no more 
comments for more than a week,
thanks. :)

Ding

On 2017/6/29 13:47, Ding Tianhong wrote:
> ping
> 
> On 2017/6/22 20:15, Ding Tianhong wrote:
>> Some devices have problems with Transaction Layer Packets with the Relaxed
>> Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
>> PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
>> devices with Relaxed Ordering issues, and a use of this new flag by the
>> cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
>> Ports.
>>
>> It's been years since I've submitted kernel.org patches, I appolgise for the
>> almost certain submission errors.
>>
>> v2: Alexander point out that the v1 was only a part of the whole solution,
>> some platform which has some issues could use the new flag to indicate
>> that it is not safe to enable relaxed ordering attribute, then we need
>> to clear the relaxed ordering enable bits in the PCI configuration when
>> initializing the device. So add a new second patch to modify the PCI
>> initialization code to clear the relaxed ordering enable bit in the
>> event that the root complex doesn't want relaxed ordering enabled.
>>
>> The third patch was base on the v1's second patch and only be changed
>> to query the relaxed ordering enable bit in the PCI configuration space
>> to allow the Chelsio NIC to send TLPs with the relaxed ordering 
>> attributes
>> set.
>>
>> This version didn't plan to drop the defines for Intel Drivers to use the
>> new checking way to enable relaxed ordering because it is not the hardest
>> part of the moment, we could fix it in next patchset when this patches
>> reach the goal.  
>>
>> v3: Redesigned the logic for pci_configure_relaxed_ordering when 
>> configuration,
>> If a PCIe device didn't enable the relaxed ordering attribute default,
>> we should not do anything in the PCIe configuration, otherwise we
>> should check if any of the devices above us do not support relaxed
>> ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
>> the result if we get a return that indicate that the relaxed ordering
>> is not supported we should update our device to disable relaxed ordering
>> in configuration space. If the device above us doesn't exist or isn't
>> the PCIe device, we shouldn't do anything and skip updating relaxed 
>> ordering
>> because we are probably running in a guest.
>>
>> v4: Rename the functions pcie_get_relaxed_ordering and 
>> pcie_disable_relaxed_ordering
>> according John's suggestion, and modify the description, use the 
>> true/false
>> as the return value.
>>
>> We shouldn't enable relaxed ordering attribute by the setting in the root
>> complex configuration space for PCIe device, so fix it for cxgb4.
>>
>> Fix some format issues.
>>
>> v5: Removed the unnecessary code for some function which only return the bool
>> value, and add the check for VF device.
>>
>> Make this patch set base on 4.12-rc5.
>>
>> v6: Fix the logic error in the need to enable the relaxed ordering attribute 
>> for cxgb4.
>>  
>> Casey Leedom (2):
>>   PCI: Add new PCIe Fabric End Node flag,
>> PCI_DEV_FLAGS_NO_RELAXED_ORDERING
>>   net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
>>
>> Ding Tianhong (1):
>>   PCI: Enable PCIe Relaxed Ordering if supported
>>
>>  drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
>>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
>>  drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
>>  drivers/pci/pci.c   | 32 +++
>>  drivers/pci/probe.c | 41 
>> +
>>  drivers/pci/quirks.c| 38 +++
>>  include/linux/pci.h |  4 +++
>>  7 files changed, 136 insertions(+), 2 deletions(-)
>>



Re: [PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-28 Thread Ding Tianhong
ping

On 2017/6/22 20:15, Ding Tianhong wrote:
> Some devices have problems with Transaction Layer Packets with the Relaxed
> Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
> PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
> devices with Relaxed Ordering issues, and a use of this new flag by the
> cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
> Ports.
> 
> It's been years since I've submitted kernel.org patches, I appolgise for the
> almost certain submission errors.
> 
> v2: Alexander point out that the v1 was only a part of the whole solution,
> some platform which has some issues could use the new flag to indicate
> that it is not safe to enable relaxed ordering attribute, then we need
> to clear the relaxed ordering enable bits in the PCI configuration when
> initializing the device. So add a new second patch to modify the PCI
> initialization code to clear the relaxed ordering enable bit in the
> event that the root complex doesn't want relaxed ordering enabled.
> 
> The third patch was base on the v1's second patch and only be changed
> to query the relaxed ordering enable bit in the PCI configuration space
> to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
> set.
> 
> This version didn't plan to drop the defines for Intel Drivers to use the
> new checking way to enable relaxed ordering because it is not the hardest
> part of the moment, we could fix it in next patchset when this patches
> reach the goal.  
> 
> v3: Redesigned the logic for pci_configure_relaxed_ordering when 
> configuration,
> If a PCIe device didn't enable the relaxed ordering attribute default,
> we should not do anything in the PCIe configuration, otherwise we
> should check if any of the devices above us do not support relaxed
> ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
> the result if we get a return that indicate that the relaxed ordering
> is not supported we should update our device to disable relaxed ordering
> in configuration space. If the device above us doesn't exist or isn't
> the PCIe device, we shouldn't do anything and skip updating relaxed 
> ordering
> because we are probably running in a guest.
> 
> v4: Rename the functions pcie_get_relaxed_ordering and 
> pcie_disable_relaxed_ordering
> according John's suggestion, and modify the description, use the 
> true/false
> as the return value.
> 
> We shouldn't enable relaxed ordering attribute by the setting in the root
> complex configuration space for PCIe device, so fix it for cxgb4.
> 
> Fix some format issues.
> 
> v5: Removed the unnecessary code for some function which only return the bool
> value, and add the check for VF device.
> 
> Make this patch set base on 4.12-rc5.
> 
> v6: Fix the logic error in the need to enable the relaxed ordering attribute 
> for cxgb4.
>  
> Casey Leedom (2):
>   PCI: Add new PCIe Fabric End Node flag,
> PCI_DEV_FLAGS_NO_RELAXED_ORDERING
>   net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
> 
> Ding Tianhong (1):
>   PCI: Enable PCIe Relaxed Ordering if supported
> 
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
>  drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
>  drivers/pci/pci.c   | 32 +++
>  drivers/pci/probe.c | 41 
> +
>  drivers/pci/quirks.c| 38 +++
>  include/linux/pci.h |  4 +++
>  7 files changed, 136 insertions(+), 2 deletions(-)
> 



[PATCH v6 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-22 Thread Ding Tianhong
The PCIe Device Control Register use the bit 4 to indicate that
whether the device is permitted to enable relaxed ordering or not.
But relaxed ordering is not safe for some platform which could only
use strong write ordering, so devices are allowed (but not required)
to enable relaxed ordering bit by default.

If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest machine.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c   | 29 +
 drivers/pci/probe.c | 37 +
 include/linux/pci.h |  2 ++
 3 files changed, 68 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 563901c..c773e0d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4879,6 +4879,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
+ * @dev: PCI device to query
+ *
+ * Returns true if the device support relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
+
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 19c8950..2615299 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1701,6 +1701,42 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pci_dev_should_disable_relaxed_ordering - check if the PCI device
+ * should disable the relaxed ordering attribute.
+ * @dev: PCI device
+ *
+ * Return true if any of the PCI devices above us do not support
+ * relaxed ordering.
+ */
+static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
+{
+   while (dev) {
+   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING)
+   return true;
+
+   dev = dev->bus->self;
+   }
+
+   return false;
+}
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   /* We should not alter the relaxed ordering bit for the VF */
+   if (dev->is_virtfn)
+   return;
+
+   /* If the releaxed ordering enable bit is not set, do nothing. */
+   if (!pcie_relaxed_ordering_supported(dev))
+   return;
+
+   if (pci_dev_should_disable_relaxed_ordering(dev)) {
+   pcie_clear_relaxed_ordering(dev);
+   dev_info(>dev, "Disable Relaxed Ordering\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1708,6 +1744,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ce77690..f5f200f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1110,6 +1110,8 @@ int __pci_enable_wake(struct pci_dev *dev, pci_power_t 
state,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev);
 
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
  bool enable)
-- 
1.9.0




[PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-22 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.  

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

v5: Removed the unnecessary code for some function which only return the bool
value, and add the check for VF device.

Make this patch set base on 4.12-rc5.

v6: Fix the logic error in the need to enable the relaxed ordering attribute 
for cxgb4.
 
Casey Leedom (2):
  PCI: Add new PCIe Fabric End Node flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (1):
  PCI: Enable PCIe Relaxed Ordering if supported

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
 drivers/pci/pci.c   | 32 +++
 drivers/pci/probe.c | 41 +
 drivers/pci/quirks.c| 38 +++
 include/linux/pci.h |  4 +++
 7 files changed, 136 insertions(+), 2 deletions(-)

-- 
1.9.0




[PATCH v6 1/3] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-06-22 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING indicates that the Relaxed
Ordering Attribute should not be used on Transaction Layer Packets destined
for the PCIe End Node so flagged.  Initially flagged this way are Intel
E5-26xx Root Complex Ports which suffer from a Flow Control Credit
Performance Problem and AMD A1100 ARM ("SEATTLE") Root Complex Ports which
don't obey PCIe 3.0 ordering rules which can lead to Data Corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/quirks.c | 38 ++
 include/linux/pci.h  |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 085fb78..58bdd23 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3999,6 +3999,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
+ * cause performance problems with Upstream Transaction Layer Packets with
+ * Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8039f9f..ce77690 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -188,6 +188,8 @@ enum pci_dev_flags {
 * the direct_complete optimization.
 */
PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
-- 
1.9.0




[PATCH v6 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-22 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index e88c180..478f25a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -521,6 +521,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index ea1bfcf..0c4a6e9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4735,6 +4735,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (!pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index f05f0d4..ac229a3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2571,6 +2571,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2624,8 +2625,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.9.0




Re: [PATCH v5 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-21 Thread Ding Tianhong


On 2017/6/22 2:28, Alexander Duyck wrote:
> On Sun, Jun 18, 2017 at 11:53 PM, Ding Tianhong <dingtianh...@huawei.com> 
> wrote:
>> From: Casey Leedom <lee...@chelsio.com>
>>
>> cxgb4 Ethernet driver now queries PCIe configuration space to determine
>> if it can send TLPs to it with the Relaxed Ordering Attribute set.
>>
>> Signed-off-by: Casey Leedom <lee...@chelsio.com>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> ---
>>  drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
>>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 +
>>  drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
>>  3 files changed, 21 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
>> b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
>> index e88c180..478f25a 100644
>> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
>> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
>> @@ -521,6 +521,7 @@ enum { /* adapter flags 
>> */
>> USING_SOFT_PARAMS  = (1 << 6),
>> MASTER_PF  = (1 << 7),
>> FW_OFLD_CONN   = (1 << 9),
>> +   ROOT_NO_RELAXED_ORDERING = (1 << 10),
>>  };
>>
>>  enum {
>> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
>> b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
>> index ea1bfcf..7cd4e1b 100644
>> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
>> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
>> @@ -4735,6 +4735,23 @@ static int init_one(struct pci_dev *pdev, const 
>> struct pci_device_id *ent)
>> adapter->msg_enable = DFLT_MSG_ENABLE;
>> memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
>>
>> +   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
>> +* Ingress Packet Data to Free List Buffers in order to allow for
>> +* chipset performance optimizations between the Root Complex and
>> +* Memory Controllers.  (Messages to the associated Ingress Queue
>> +* notifying new Packet Placement in the Free Lists Buffers will be
>> +* send without the Relaxed Ordering Attribute thus guaranteeing that
>> +* all preceding PCIe Transaction Layer Packets will be processed
>> +* first.)  But some Root Complexes have various issues with Upstream
>> +* Transaction Layer Packets with the Relaxed Ordering Attribute set.
>> +* The PCIe devices which under the Root Complexes will be cleared 
>> the
>> +* Relaxed Ordering bit in the configuration space, So we check our
>> +* PCIe configuration space to see if it's flagged with advice 
>> against
>> +* using Relaxed Ordering.
>> +*/
>> +   if (pcie_relaxed_ordering_supported(pdev))
>> +   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
>> +
> 
> Looks like you have a typo here. It should be
> "!pcie_relaxed_ordering_supported(pdev)" that you are testing for to
> set this flag shouldn't it? Right now it appears the flag is getting
> set if relaxed ordering is supported.
> 

Yes, I made a mistake here, thanks for correcting.

Ding

>> spin_lock_init(>stats_lock);
>> spin_lock_init(>tid_release_lock);
>> spin_lock_init(>win0_lock);
>> diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
>> b/drivers/net/ethernet/chelsio/cxgb4/sge.c
>> index f05f0d4..ac229a3 100644
>> --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
>> +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
>> @@ -2571,6 +2571,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
>> sge_rspq *iq, bool fwevtq,
>> struct fw_iq_cmd c;
>> struct sge *s = >sge;
>> struct port_info *pi = netdev_priv(dev);
>> +   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
>>
>> /* Size needs to be multiple of 16, including status entry. */
>> iq->size = roundup(iq->size, 16);
>> @@ -2624,8 +2625,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
>> sge_rspq *iq, bool fwevtq,
>>
>> flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
>> c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
>> -FW_IQ_CMD_FL0FETCHRO_F |
>> -FW_IQ_CMD_FL0DATARO_F |
>> +FW_IQ_CMD_FL0FETCHRO_V(relaxed) 
>> |
>> +FW_IQ_CMD_FL0DATARO_V(relaxed) |
>>  FW_IQ_CMD_FL0PADEN_F);
>> if (cong >= 0)
>> c.iqns_to_fl0congen |=
>> --
>> 1.9.0
>>
>>
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
> .
> 



[PATCH v5 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-19 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index e88c180..478f25a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -521,6 +521,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index ea1bfcf..7cd4e1b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4735,6 +4735,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index f05f0d4..ac229a3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2571,6 +2571,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2624,8 +2625,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.9.0




[PATCH v5 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-19 Thread Ding Tianhong
The PCIe Device Control Register use the bit 4 to indicate that
whether the device is permitted to enable relaxed ordering or not.
But relaxed ordering is not safe for some platform which could only
use strong write ordering, so devices are allowed (but not required)
to enable relaxed ordering bit by default.

If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest machine.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c   | 29 +
 drivers/pci/probe.c | 37 +
 include/linux/pci.h |  2 ++
 3 files changed, 68 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 563901c..c773e0d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4879,6 +4879,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
+ * @dev: PCI device to query
+ *
+ * Returns true if the device support relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return !!(v & PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
+
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 19c8950..2615299 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1701,6 +1701,42 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pci_dev_should_disable_relaxed_ordering - check if the PCI device
+ * should disable the relaxed ordering attribute.
+ * @dev: PCI device
+ *
+ * Return true if any of the PCI devices above us do not support
+ * relaxed ordering.
+ */
+static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
+{
+   while (dev) {
+   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING)
+   return true;
+
+   dev = dev->bus->self;
+   }
+
+   return false;
+}
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   /* We should not alter the relaxed ordering bit for the VF */
+   if (dev->is_virtfn)
+   return;
+
+   /* If the releaxed ordering enable bit is not set, do nothing. */
+   if (!pcie_relaxed_ordering_supported(dev))
+   return;
+
+   if (pci_dev_should_disable_relaxed_ordering(dev)) {
+   pcie_clear_relaxed_ordering(dev);
+   dev_info(>dev, "Disable Relaxed Ordering\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1708,6 +1744,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ce77690..f5f200f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1110,6 +1110,8 @@ int __pci_enable_wake(struct pci_dev *dev, pci_power_t 
state,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev);
 
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
  bool enable)
-- 
1.9.0




[PATCH v5 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-19 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.  

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

v5: Removed the unnecessary code for some function which only return the bool
value, and add the check for VF device.

Make this patch set base on 4.12-rc5.

Casey Leedom (2):
  PCI: Add new PCIe Fabric End Node flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (1):
  PCI: Enable PCIe Relaxed Ordering if supported

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
 drivers/pci/pci.c   | 32 +++
 drivers/pci/probe.c | 41 +
 drivers/pci/quirks.c| 38 +++
 include/linux/pci.h |  4 +++
 7 files changed, 136 insertions(+), 2 deletions(-)

-- 
1.9.0




[PATCH v5 1/3] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-06-19 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING indicates that the Relaxed
Ordering Attribute should not be used on Transaction Layer Packets destined
for the PCIe End Node so flagged.  Initially flagged this way are Intel
E5-26xx Root Complex Ports which suffer from a Flow Control Credit
Performance Problem and AMD A1100 ARM ("SEATTLE") Root Complex Ports which
don't obey PCIe 3.0 ordering rules which can lead to Data Corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/quirks.c | 38 ++
 include/linux/pci.h  |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 085fb78..58bdd23 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3999,6 +3999,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
+ * cause performance problems with Upstream Transaction Layer Packets with
+ * Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8039f9f..ce77690 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -188,6 +188,8 @@ enum pci_dev_flags {
 * the direct_complete optimization.
 */
PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
-- 
1.9.0




Re: [PATCH v4 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-19 Thread Ding Tianhong


On 2017/6/16 22:39, Alexander Duyck wrote:
> On Thu, Jun 15, 2017 at 6:10 PM, Ding Tianhong <dingtianh...@huawei.com> 
> wrote:
>>
>>
>> On 2017/6/13 5:28, Alexander Duyck wrote:
>>> On Mon, Jun 12, 2017 at 4:05 AM, Ding Tianhong <dingtianh...@huawei.com> 
>>> wrote:
>> ...
>>>>  /**
>>>> + * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
>>>> + * @dev: PCI device to query
>>>> + *
>>>> + * If possible clear relaxed ordering
>>>> + */
>>>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>>>> +{
>>>> +   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
>>>> + PCI_EXP_DEVCTL_RELAX_EN);
>>>> +}
>>>> +EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
>>>> +
>>>> +/**
>>>> + * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering 
>>>> support
>>>> + * @dev: PCI device to query
>>>> + *
>>>> + * Returns true if the device support relaxed ordering attribute.
>>>> + */
>>>> +bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
>>>> +{
>>>> +   bool ro_supported = false;
>>>> +   u16 v;
>>>> +
>>>> +   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
>>>> +   if ((v & PCI_EXP_DEVCTL_RELAX_EN) >> 4)
>>>> +   ro_supported = true;
>>>
>>> Instead of "return ro_supported" why not just "return !!(v &
>>> PCIE_EXP_DEVCTL_RELAX_EN)"? You can cut out the extra steps and save
>>> yourself some extra steps this way since the shift by 4 shouldn't even
>>> really be needed since you are just testing for a bit anyway.
>>>
>>
>> OK.
>>
>>>> +
>>>> +   return ro_supported;
>>>> +}
>>>> +EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
>>>> +
>>>> +/**
>>>>   * pcie_get_minimum_link - determine minimum link settings of a PCI device
>>>>   * @dev: PCI device to query
>>>>   * @speed: storage for minimum speed
>>>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>>>> index 19c8950..ed1f717 100644
>>>> --- a/drivers/pci/probe.c
>>>> +++ b/drivers/pci/probe.c
>>>> @@ -1701,6 +1701,46 @@ static void pci_configure_extended_tags(struct 
>>>> pci_dev *dev)
>>>>  PCI_EXP_DEVCTL_EXT_TAG);
>>>>  }
>>>>
>>>> +/**
>>>> + * pci_dev_should_disable_relaxed_ordering - check if the PCI device
>>>> + * should disable the relaxed ordering attribute.
>>>> + * @dev: PCI device
>>>> + *
>>>> + * Return true if any of the PCI devices above us do not support
>>>> + * relaxed ordering.
>>>> + */
>>>> +static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
>>>> +{
>>>> +   bool ro_disabled = false;
>>>> +
>>>> +   while (dev) {
>>>> +   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
>>>> +   ro_disabled = true;
>>>> +   break;
>>>> +   }
>>>> +   dev = dev->bus->self;
>>>> +   }
>>>> +
>>>> +   return ro_disabled;
>>>
>>> Same thing here. I would suggest just returning either true or false,
>>> and drop the ro_disabled value. It will return the lines of code and
>>> make things a bit bit more direct.
>>>
>>
>> OK.
>>
>>>> +}
>>>> +
>>>> +static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>>>> +{
>>>> +   struct pci_dev *bridge = pci_upstream_bridge(dev);
>>>> +
>>>> +   if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge))
>>>> +   return;
>>>
>>> The pci_is_pcie check is actually redundant based on the
>>> pcie_relaxed_ordering_supported check using pcie_capability_read_word.
>>>
>>
>> Yes, pcie_capability_read_word already check it, thanks.
>>
>>
>>> Also I am not sure what the point is of the pci_upstream_bridge()
>>> check is, it seems like you should be able to catch all the same stuff
>>> in your pci_dev_should_disabl

Re: [PATCH v4 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-15 Thread Ding Tianhong


On 2017/6/13 5:28, Alexander Duyck wrote:
> On Mon, Jun 12, 2017 at 4:05 AM, Ding Tianhong <dingtianh...@huawei.com> 
> wrote:
...
>>  /**
>> + * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible clear relaxed ordering
>> + */
>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
>> + PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
>> +
>> +/**
>> + * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
>> + * @dev: PCI device to query
>> + *
>> + * Returns true if the device support relaxed ordering attribute.
>> + */
>> +bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
>> +{
>> +   bool ro_supported = false;
>> +   u16 v;
>> +
>> +   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
>> +   if ((v & PCI_EXP_DEVCTL_RELAX_EN) >> 4)
>> +   ro_supported = true;
> 
> Instead of "return ro_supported" why not just "return !!(v &
> PCIE_EXP_DEVCTL_RELAX_EN)"? You can cut out the extra steps and save
> yourself some extra steps this way since the shift by 4 shouldn't even
> really be needed since you are just testing for a bit anyway.
> 

OK.

>> +
>> +   return ro_supported;
>> +}
>> +EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
>> +
>> +/**
>>   * pcie_get_minimum_link - determine minimum link settings of a PCI device
>>   * @dev: PCI device to query
>>   * @speed: storage for minimum speed
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index 19c8950..ed1f717 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1701,6 +1701,46 @@ static void pci_configure_extended_tags(struct 
>> pci_dev *dev)
>>  PCI_EXP_DEVCTL_EXT_TAG);
>>  }
>>
>> +/**
>> + * pci_dev_should_disable_relaxed_ordering - check if the PCI device
>> + * should disable the relaxed ordering attribute.
>> + * @dev: PCI device
>> + *
>> + * Return true if any of the PCI devices above us do not support
>> + * relaxed ordering.
>> + */
>> +static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   bool ro_disabled = false;
>> +
>> +   while (dev) {
>> +   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
>> +   ro_disabled = true;
>> +   break;
>> +   }
>> +   dev = dev->bus->self;
>> +   }
>> +
>> +   return ro_disabled;
> 
> Same thing here. I would suggest just returning either true or false,
> and drop the ro_disabled value. It will return the lines of code and
> make things a bit bit more direct.
> 

OK.

>> +}
>> +
>> +static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   struct pci_dev *bridge = pci_upstream_bridge(dev);
>> +
>> +   if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge))
>> +   return;
> 
> The pci_is_pcie check is actually redundant based on the
> pcie_relaxed_ordering_supported check using pcie_capability_read_word.
>

Yes, pcie_capability_read_word already check it, thanks.


> Also I am not sure what the point is of the pci_upstream_bridge()
> check is, it seems like you should be able to catch all the same stuff
> in your pci_dev_should_disable_relaxed_ordering() call. Though it did
> give me a thought. I don't think we can alter this for a VF, so you
> might want to add a check for dev->is_virtfn to the list of checks and
> if it is a virtual function just return since I don't think there are
> any VFs that would let you alter this bit anyway.
> 
If the upstream device is null, does it mean that it is in a guest OS device? 
maybe I miss something.
also I will check the dev->is_virtfn to avoid trying to change the 
configuration space for VF.

Another question: Because it looks like that maybe the Casey is too busy these 
days, should we
delay the modification of the cxgb4 and instead to update the ixgbe? what do 
you think about it. :)

Thanks.
Ding

>> +   /* If the releaxed ordering enable bit is not set, do nothing. */
>> +   if (!pcie_relaxed_ordering_supported(dev))
>> +   return;
>> +
>> +   if (pci_dev_should_disable_relaxed_ordering(dev)) {
>> + 

[PATCH v4 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-12 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.  

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

v4: Rename the functions pcie_get_relaxed_ordering and 
pcie_disable_relaxed_ordering
according John's suggestion, and modify the description, use the true/false
as the return value.

We shouldn't enable relaxed ordering attribute by the setting in the root
complex configuration space for PCIe device, so fix it for cxgb4.

Fix some format issues.

Casey Leedom (2):
  PCI: Add new PCIe Fabric End Node flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (1):
  PCI: Enable PCIe Relaxed Ordering if supported

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
 drivers/pci/pci.c   | 32 +++
 drivers/pci/probe.c | 41 +
 drivers/pci/quirks.c| 38 +++
 include/linux/pci.h |  4 +++
 7 files changed, 136 insertions(+), 2 deletions(-)

-- 
1.9.0




[PATCH v4 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-12 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries PCIe configuration space to determine
if it can send TLPs to it with the Relaxed Ordering Attribute set.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index e88c180..478f25a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -521,6 +521,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 38a5c67..1dd093d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4726,6 +4726,23 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteeing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* The PCIe devices which under the Root Complexes will be cleared the
+* Relaxed Ordering bit in the configuration space, So we check our
+* PCIe configuration space to see if it's flagged with advice against
+* using Relaxed Ordering.
+*/
+   if (pcie_relaxed_ordering_supported(pdev))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index f05f0d4..ac229a3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2571,6 +2571,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2624,8 +2625,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.9.0




[PATCH v4 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-12 Thread Ding Tianhong
The PCIe Device Control Register use the bit 4 to indicate that
whether the device is permitted to enable relaxed ordering or not.
But relaxed ordering is not safe for some platform which could only
use strong write ordering, so devices are allowed (but not required)
to enable relaxed ordering bit by default.

If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest machine.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c   | 32 
 drivers/pci/probe.c | 41 +
 include/linux/pci.h |  2 ++
 3 files changed, 75 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5b..b44f34c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4878,6 +4878,38 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_clear_relaxed_ordering - clear PCI Express relaxed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_relaxed_ordering_supported - Probe for PCIe relexed ordering support
+ * @dev: PCI device to query
+ *
+ * Returns true if the device support relaxed ordering attribute.
+ */
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev)
+{
+   bool ro_supported = false;
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+   if ((v & PCI_EXP_DEVCTL_RELAX_EN) >> 4)
+   ro_supported = true;
+
+   return ro_supported;
+}
+EXPORT_SYMBOL(pcie_relaxed_ordering_supported);
+
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 19c8950..ed1f717 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1701,6 +1701,46 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pci_dev_should_disable_relaxed_ordering - check if the PCI device
+ * should disable the relaxed ordering attribute.
+ * @dev: PCI device
+ *
+ * Return true if any of the PCI devices above us do not support
+ * relaxed ordering.
+ */ 
+static bool pci_dev_should_disable_relaxed_ordering(struct pci_dev *dev)
+{
+   bool ro_disabled = false;
+
+   while (dev) {
+   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
+   ro_disabled = true;
+   break;
+   }
+   dev = dev->bus->self;
+   }
+
+   return ro_disabled;
+}
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   struct pci_dev *bridge = pci_upstream_bridge(dev);
+
+   if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge))
+   return;
+
+   /* If the releaxed ordering enable bit is not set, do nothing. */
+   if (!pcie_relaxed_ordering_supported(dev))
+   return;
+
+   if (pci_dev_should_disable_relaxed_ordering(dev)) {
+   pcie_clear_relaxed_ordering(dev);
+   dev_info(>dev, "Disable Relaxed Ordering\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1708,6 +1748,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e1e8428..9870781 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1105,6 +1105,8 @@ int __pci_enable_wake(struct pci_dev *dev, pci_power_t 
state,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+bool pcie_relaxed_ordering_supported(struct pci_dev *dev);
 
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
  bool enable)
-- 
1.9.0




[PATCH v4 1/3] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-06-12 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING indicates that the Relaxed
Ordering Attribute should not be used on Transaction Layer Packets destined
for the PCIe End Node so flagged.  Initially flagged this way are Intel
E5-26xx Root Complex Ports which suffer from a Flow Control Credit
Performance Problem and AMD A1100 ARM ("SEATTLE") Root Complex Ports which
don't obey PCIe 3.0 ordering rules which can lead to Data Corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/quirks.c | 38 ++
 include/linux/pci.h  |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 085fb78..58bdd23 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3999,6 +3999,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
+ * cause performance problems with Upstream Transaction Layer Packets with
+ * Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b..e1e8428 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -183,6 +183,8 @@ enum pci_dev_flags {
PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
/* Do not use FLR even if device advertises PCI_AF_CAP */
PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
-- 
1.9.0




Re: [PATCH v3 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-12 Thread Ding Tianhong


On 2017/6/8 7:24, Alexander Duyck wrote:
> On Wed, Jun 7, 2017 at 2:16 AM, Ding Tianhong <dingtianh...@huawei.com> wrote:
>> From: Casey Leedom <lee...@chelsio.com>
>>
>> cxgb4 Ethernet driver now queries Root Complex Port to determine if it can
>> send TLPs to it with the Relaxed Ordering Attribute set.
>>
>> Signed-off-by: Casey Leedom <lee...@chelsio.com>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
> 
> So I am pretty sure this patch doesn't work with patch 2. We need to
> update it so that it doesn't check the root complex but instead checks
> itself to see if it is allowed to use relaxed ordering.
> 

Right, we should check the End Point PCIe device configuration space, not RC.

> What we need here is the ability to detect if relaxed ordering is
> disabled, and if so take the steps needed to enable peer to peer
> relaxed ordering without enabling relaxed ordering to the root
> complex. Do I have that right Casey?
> 

I am not very clear to this driver about how to enable peer to peer
relaxed ordering without enabling relaxed ordering to the RC, need
some help from Casey, so I will still focus on this patch and only
fix the peer to RC relaxed ordering problem, I hope Casey could send
another patch to fix it later.

Thanks
Ding

>> ---
>>  drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
>>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 +
>>  drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
>>  3 files changed, 21 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
>> b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
>> index e88c180..478f25a 100644
>> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
>> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
>> @@ -521,6 +521,7 @@ enum { /* adapter flags 
>> */
>> USING_SOFT_PARAMS  = (1 << 6),
>> MASTER_PF  = (1 << 7),
>> FW_OFLD_CONN   = (1 << 9),
>> +   ROOT_NO_RELAXED_ORDERING = (1 << 10),
>>  };
>>
>>  enum {
>> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
>> b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
>> index 38a5c67..fbfe341 100644
>> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
>> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
>> @@ -4628,6 +4628,7 @@ static int init_one(struct pci_dev *pdev, const struct 
>> pci_device_id *ent)
>>  #ifdef CONFIG_PCI_IOV
>> u32 v, port_vec;
>>  #endif
>> +   struct pci_dev *root;
>>
>> printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
>>
>> @@ -4726,6 +4727,22 @@ static int init_one(struct pci_dev *pdev, const 
>> struct pci_device_id *ent)
>> adapter->msg_enable = DFLT_MSG_ENABLE;
>> memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
>>
>> +   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
>> +* Ingress Packet Data to Free List Buffers in order to allow for
>> +* chipset performance optimizations between the Root Complex and
>> +* Memory Controllers.  (Messages to the associated Ingress Queue
>> +* notifying new Packet Placement in the Free Lists Buffers will be
>> +* send without the Relaxed Ordering Attribute thus guaranteing that
>> +* all preceding PCIe Transaction Layer Packets will be processed
>> +* first.)  But some Root Complexes have various issues with Upstream
>> +* Transaction Layer Packets with the Relaxed Ordering Attribute set.
>> +* So we check our Root Complex to see if it's flaged with advice
>> +* against using Relaxed Ordering.
>> +*/
>> +   root = pci_find_pcie_root_port(adapter->pdev);
>> +   if (pcie_get_relaxed_ordering(root))
>> +   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
>> +
>> spin_lock_init(>stats_lock);
>> spin_lock_init(>tid_release_lock);
>> spin_lock_init(>win0_lock);
>> diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
>> b/drivers/net/ethernet/chelsio/cxgb4/sge.c
>> index f05f0d4..ac229a3 100644
>> --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
>> +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
>> @@ -2571,6 +2571,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
>> sge_rspq *iq, bool fwevtq,
>> struct fw_iq_cmd c;
>> struct sge *s = >sge;
>>  

Re: [PATCH v3 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-09 Thread Ding Tianhong

Hi John:

Thanks for the reviewing, I will fix it in next version.

Ding

On 2017/6/8 1:55, John Garry wrote:
> On 07/06/2017 10:16, Ding Tianhong wrote:
> 
> Hi Ding,
> 
> A few general style comments:
> 
>> The PCIe Device Control Register use the bit 4 to indicate that
>> whether the device is permitted to enable relaxed ordering or not.
>> But relaxed ordering is not safe for some platform which could only
>> use strong write ordering, so devices are allowed (but not required)
>> to enable relaxed ordering bit by default.
>>
>> If a PCIe device didn't enable the relaxed ordering attribute default,
>> we should not do anything in the PCIe configuration, otherwise we
>> should check if any of the devices above us do not support relaxed
>> ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
>> the result if we get a return that indicate that the relaxed ordering
>> is not supported we should update our device to disable relaxed ordering
>> in configuration space. If the device above us doesn't exist or isn't
>> the PCIe device, we shouldn't do anything and skip updating relaxed ordering
>> because we are probably running in a guest.
> 
> A guest machine/environment
> 
>>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> ---
>>  drivers/pci/pci.c   | 29 +
>>  drivers/pci/probe.c | 43 +++
>>  include/linux/pci.h |  2 ++
>>  3 files changed, 74 insertions(+)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index b01bd5b..3d42b38 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -4878,6 +4878,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>  EXPORT_SYMBOL(pcie_set_mps);
>>
>>  /**
>> + * pcie_clear_relaxed_ordering - clear PCI Express relexed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible clear relaxed ordering
>> + */
>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
>> +  PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
>> +
>> +/**
>> + * pcie_get_relaxed_ordering - check PCI Express relexed ordering bit
> 
> s/relexed/relaxed/
> 
> Check what on relaxed ordering bit?
> 
> And the function name is inconsistent with this discription.
> 
>> + * @dev: PCI device to query
>> + *
>> + * Returns true if relaxed ordering is been set
> 
> If you want to return true/false, then use !!, below in the function
> 
>> + */
>> +int pcie_get_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +u16 v;
>> +
>> +pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
>> +
>> +return (v & PCI_EXP_DEVCTL_RELAX_EN) >> 4;
>> +}
>> +EXPORT_SYMBOL(pcie_get_relaxed_ordering);
>> +
>> +/**
>>   * pcie_get_minimum_link - determine minimum link settings of a PCI device
>>   * @dev: PCI device to query
>>   * @speed: storage for minimum speed
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index 19c8950..0c94c80 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1701,6 +1701,48 @@ static void pci_configure_extended_tags(struct 
>> pci_dev *dev)
>>   PCI_EXP_DEVCTL_EXT_TAG);
>>  }
>>
>> +/**
>> + * pci_dev_disable_relaxed_ordering - check if the PCI device
>> + * should disable the relaxed ordering attribute.
> 
> I think that we need a more accurate description. I know some people think a 
> function which just "checks" is vague.
> 
>> + * @dev: PCI device
>> + *
>> + * Return true if any of the PCI devices above us do not support
>> + * relaxed ordering.
>> + */
>> +static int pci_dev_disable_relaxed_ordering(struct pci_dev *dev)
> 
> The function name implies an action - disabling - but this function does 
> nothing except return a value
> 
>> +{
>> +int ro_disabled = 0;
>> +
>> +while(dev) {
> 
> Did you run checkpatch?
> 
>> +if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
>> +ro_disabled = 1;
> 
> just return true, and return false at the bottom, so you can do away with 
> ro_disabled (which is not a bool)
> 
>> +break;
>> +}
>> +dev = dev->bus->self;
>> +}
>> +
>> +return ro_disabled;
>> +}
>> +
>> +static void pci_confi

[PATCH v3 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-07 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries Root Complex Port to determine if it can
send TLPs to it with the Relaxed Ordering Attribute set.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index e88c180..478f25a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -521,6 +521,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 38a5c67..fbfe341 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4628,6 +4628,7 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
 #ifdef CONFIG_PCI_IOV
u32 v, port_vec;
 #endif
+   struct pci_dev *root;
 
printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
 
@@ -4726,6 +4727,22 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* So we check our Root Complex to see if it's flaged with advice
+* against using Relaxed Ordering.
+*/
+   root = pci_find_pcie_root_port(adapter->pdev);
+   if (pcie_get_relaxed_ordering(root))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index f05f0d4..ac229a3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2571,6 +2571,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2624,8 +2625,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.9.0




[PATCH v3 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-07 Thread Ding Tianhong
The PCIe Device Control Register use the bit 4 to indicate that
whether the device is permitted to enable relaxed ordering or not.
But relaxed ordering is not safe for some platform which could only
use strong write ordering, so devices are allowed (but not required)
to enable relaxed ordering bit by default.

If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c   | 29 +
 drivers/pci/probe.c | 43 +++
 include/linux/pci.h |  2 ++
 3 files changed, 74 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5b..3d42b38 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4878,6 +4878,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_clear_relaxed_ordering - clear PCI Express relexed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_get_relaxed_ordering - check PCI Express relexed ordering bit
+ * @dev: PCI device to query
+ *
+ * Returns true if relaxed ordering is been set
+ */
+int pcie_get_relaxed_ordering(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return (v & PCI_EXP_DEVCTL_RELAX_EN) >> 4;
+}
+EXPORT_SYMBOL(pcie_get_relaxed_ordering);
+
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 19c8950..0c94c80 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1701,6 +1701,48 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+/**
+ * pci_dev_disable_relaxed_ordering - check if the PCI device
+ * should disable the relaxed ordering attribute.
+ * @dev: PCI device
+ *
+ * Return true if any of the PCI devices above us do not support
+ * relaxed ordering.
+ */ 
+static int pci_dev_disable_relaxed_ordering(struct pci_dev *dev)
+{
+   int ro_disabled = 0;
+
+   while(dev) {
+   if (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) {
+   ro_disabled = 1;
+   break;
+   }
+   dev = dev->bus->self;
+   }
+
+   return ro_disabled;
+}
+
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   struct pci_dev *bridge = pci_upstream_bridge(dev);
+   int origin_ero;
+
+   if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge))
+   return;
+
+   origin_ero = pcie_get_relaxed_ordering(dev);
+   /* If the releaxed ordering enable bit is not set, do nothing. */
+   if (!origin_ero)
+   return;
+
+   if (pci_dev_disable_relaxed_ordering(dev)) {
+   pcie_clear_relaxed_ordering(dev);
+   dev_info(>dev, "Disable Relaxed Ordering\n");
+   }
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1708,6 +1750,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e1e8428..299d2f3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1105,6 +1105,8 @@ int __pci_enable_wake(struct pci_dev *dev, pci_power_t 
state,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+int pcie_get_relaxed_ordering(struct pci_dev *dev);
 
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
  bool enable)
-- 
1.9.0




[PATCH v3 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-07 Thread Ding Tianhong
Some devices have problems with Transaction Layer Packets with the Relaxed
Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
devices with Relaxed Ordering issues, and a use of this new flag by the
cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
Ports.

It's been years since I've submitted kernel.org patches, I appolgise for the
almost certain submission errors.

v2: Alexander point out that the v1 was only a part of the whole solution,
some platform which has some issues could use the new flag to indicate
that it is not safe to enable relaxed ordering attribute, then we need
to clear the relaxed ordering enable bits in the PCI configuration when
initializing the device. So add a new second patch to modify the PCI
initialization code to clear the relaxed ordering enable bit in the
event that the root complex doesn't want relaxed ordering enabled.

The third patch was base on the v1's second patch and only be changed
to query the relaxed ordering enable bit in the PCI configuration space
to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
set.

This version didn't plan to drop the defines for Intel Drivers to use the
new checking way to enable relaxed ordering because it is not the hardest
part of the moment, we could fix it in next patchset when this patches
reach the goal.  

v3: Redesigned the logic for pci_configure_relaxed_ordering when configuration,
If a PCIe device didn't enable the relaxed ordering attribute default,
we should not do anything in the PCIe configuration, otherwise we
should check if any of the devices above us do not support relaxed
ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
the result if we get a return that indicate that the relaxed ordering
is not supported we should update our device to disable relaxed ordering
in configuration space. If the device above us doesn't exist or isn't
the PCIe device, we shouldn't do anything and skip updating relaxed ordering
because we are probably running in a guest.

Casey Leedom (2):
  PCI: Add new PCIe Fabric End Node flag,
PCI_DEV_FLAGS_NO_RELAXED_ORDERING
  net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

Ding Tianhong (1):
  PCI: Enable PCIe Relaxed Ordering if supported

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
 drivers/pci/pci.c   | 29 +
 drivers/pci/probe.c | 43 +
 drivers/pci/quirks.c| 38 ++
 include/linux/pci.h |  4 +++
 7 files changed, 135 insertions(+), 2 deletions(-)

-- 
1.9.0




[PATCH v3 1/3] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-06-07 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING indicates that the Relaxed
Ordering Attribute should not be used on Transaction Layer Packets destined
for the PCIe End Node so flagged.  Initially flagged this way are Intel
E5-26xx Root Complex Ports which suffer from a Flow Control Credit
Performance Problem and AMD A1100 ARM ("SEATTLE") Root Complex Ports which
don't obey PCIe 3.0 ordering rules which can lead to Data Corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/quirks.c | 38 ++
 include/linux/pci.h  |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 085fb78..58bdd23 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3999,6 +3999,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
+ * cause performance problems with Upstream Transaction Layer Packets with
+ * Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b..e1e8428 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -183,6 +183,8 @@ enum pci_dev_flags {
PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
/* Do not use FLR even if device advertises PCI_AF_CAP */
PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
-- 
1.9.0




Re: [PATCH v2 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-06 Thread Ding Tianhong


On 2017/6/6 8:28, Alexander Duyck wrote:
> On Mon, Jun 5, 2017 at 6:33 AM, Ding Tianhong <dingtianh...@huawei.com> wrote:
>>
>>
>> On 2017/6/4 2:19, Alexander Duyck wrote:
>>> On Fri, Jun 2, 2017 at 9:04 PM, Ding Tianhong <dingtianh...@huawei.com> 
>>> wrote:
>>>> The PCIe Device Control Register use the bit 4 to indicate that
>>>> whether the device is permitted to enable relaxed ordering or not.
>>>> But relaxed ordering is not safe for some platform which could only
>>>> use strong write ordering, so devices are allowed (but not required)
>>>> to enable relaxed ordering bit by default.
>>>>
>>>> If a platform support relaxed ordering but does not enable it by
>>>> default, enable it in the PCIe configuration. This allows some device
>>>> to send TLPs with the relaxed ordering attributes set, which may
>>>> improve the performance.
>>>>
>>>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>>>> ---
>>>>  drivers/pci/pci.c   | 42 ++
>>>>  drivers/pci/probe.c | 11 +++
>>>>  include/linux/pci.h |  3 +++
>>>>  3 files changed, 56 insertions(+)
>>>>
>>>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>>>> index b01bd5b..f57a374 100644
>>>> --- a/drivers/pci/pci.c
>>>> +++ b/drivers/pci/pci.c
>>>> @@ -4878,6 +4878,48 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>>>  EXPORT_SYMBOL(pcie_set_mps);
>>>>
>>>>  /**
>>>> + * pcie_set_relaxed_ordering - set PCI Express relexed ordering bit
>>>> + * @dev: PCI device to query
>>>> + *
>>>> + * If possible sets relaxed ordering
>>>> + */
>>>> +int pcie_set_relaxed_ordering(struct pci_dev *dev)
>>>> +{
>>>> +   return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, 
>>>> PCI_EXP_DEVCTL_RELAX_EN);
>>>> +}
>>>> +EXPORT_SYMBOL(pcie_set_relaxed_ordering);
>>>> +
>>>> +/**
>>>> + * pcie_clear_relaxed_ordering - clear PCI Express relexed ordering bit
>>>> + * @dev: PCI device to query
>>>> + *
>>>> + * If possible clear relaxed ordering
>>>> + */
>>>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>>>> +{
>>>> +   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, 
>>>> PCI_EXP_DEVCTL_RELAX_EN);
>>>> +}
>>>> +EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
>>>> +
>>>> +/**
>>>> + * pcie_get_relaxed_ordering - check PCI Express relexed ordering bit
>>>> + * @dev: PCI device to query
>>>> + *
>>>> + * Returns true if relaxed ordering is been set
>>>> + */
>>>> +int pcie_get_relaxed_ordering(struct pci_dev *dev)
>>>> +{
>>>> +   u16 v;
>>>> +
>>>> +   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
>>>> +
>>>> +   return (v & PCI_EXP_DEVCTL_RELAX_EN) >> 4;
>>>> +}
>>>> +EXPORT_SYMBOL(pcie_get_relaxed_ordering);
>>>> +
>>>> +/**
>>>> + * pcie_set_mps - set PCI Express maximum payload size
>>>> +/**
>>>>   * pcie_get_minimum_link - determine minimum link settings of a PCI device
>>>>   * @dev: PCI device to query
>>>>   * @speed: storage for minimum speed
>>>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>>>> index 19c8950..aeb22b5 100644
>>>> --- a/drivers/pci/probe.c
>>>> +++ b/drivers/pci/probe.c
>>>> @@ -1701,6 +1701,16 @@ static void pci_configure_extended_tags(struct 
>>>> pci_dev *dev)
>>>>  PCI_EXP_DEVCTL_EXT_TAG);
>>>>  }
>>>>
>>>> +static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>>>> +{
>>>> +   int ret;
>>>> +
>>>> +   if (dev && (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING))
>>>
>>> So there is a minor issue here. The problem is this is only trying to
>>> modify relaxed ordering for the device itself. That isn't what we
>>> want. What we want is to modify it on all of the upstream port
>>> interfaces where there is something the path to the root complex that
>>> has an issue. So if the root complex has to set the
>>> NO_RELAXED_OR

Re: [PATCH v2 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-05 Thread Ding Tianhong


On 2017/6/4 2:19, Alexander Duyck wrote:
> On Fri, Jun 2, 2017 at 9:04 PM, Ding Tianhong <dingtianh...@huawei.com> wrote:
>> The PCIe Device Control Register use the bit 4 to indicate that
>> whether the device is permitted to enable relaxed ordering or not.
>> But relaxed ordering is not safe for some platform which could only
>> use strong write ordering, so devices are allowed (but not required)
>> to enable relaxed ordering bit by default.
>>
>> If a platform support relaxed ordering but does not enable it by
>> default, enable it in the PCIe configuration. This allows some device
>> to send TLPs with the relaxed ordering attributes set, which may
>> improve the performance.
>>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> ---
>>  drivers/pci/pci.c   | 42 ++
>>  drivers/pci/probe.c | 11 +++
>>  include/linux/pci.h |  3 +++
>>  3 files changed, 56 insertions(+)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index b01bd5b..f57a374 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -4878,6 +4878,48 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>  EXPORT_SYMBOL(pcie_set_mps);
>>
>>  /**
>> + * pcie_set_relaxed_ordering - set PCI Express relexed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible sets relaxed ordering
>> + */
>> +int pcie_set_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, 
>> PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_set_relaxed_ordering);
>> +
>> +/**
>> + * pcie_clear_relaxed_ordering - clear PCI Express relexed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible clear relaxed ordering
>> + */
>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, 
>> PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
>> +
>> +/**
>> + * pcie_get_relaxed_ordering - check PCI Express relexed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * Returns true if relaxed ordering is been set
>> + */
>> +int pcie_get_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   u16 v;
>> +
>> +   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
>> +
>> +   return (v & PCI_EXP_DEVCTL_RELAX_EN) >> 4;
>> +}
>> +EXPORT_SYMBOL(pcie_get_relaxed_ordering);
>> +
>> +/**
>> + * pcie_set_mps - set PCI Express maximum payload size
>> +/**
>>   * pcie_get_minimum_link - determine minimum link settings of a PCI device
>>   * @dev: PCI device to query
>>   * @speed: storage for minimum speed
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index 19c8950..aeb22b5 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1701,6 +1701,16 @@ static void pci_configure_extended_tags(struct 
>> pci_dev *dev)
>>  PCI_EXP_DEVCTL_EXT_TAG);
>>  }
>>
>> +static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   int ret;
>> +
>> +   if (dev && (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING))
> 
> So there is a minor issue here. The problem is this is only trying to
> modify relaxed ordering for the device itself. That isn't what we
> want. What we want is to modify it on all of the upstream port
> interfaces where there is something the path to the root complex that
> has an issue. So if the root complex has to set the
> NO_RELAXED_ORDERING flag on a root port, all of the interfaces below
> it that would be pushing traffic toward it should not have the relaxed
> ordering bit set.
> 
> Also I am pretty sure this is a PCIe capability, not a PCI capability.
> You probably need to make sure you code is making this distinction
> which I don't know if it currently is. If you need an example of the
> kind of checks I am suggesting just take a look at
> pcie_configure_mps(). It is verifying the function is PCIe before
> attempting to make any updates. In your case you will probably also
> need to make sure there is a bus for you to walk up the chain of.
> Otherwise this shouldn't apply.
> 
> 
>> +   pcie_set_relaxed_ordering(dev);
>> +   else
>> +   pcie_clear_relaxed_ordering(dev);
>> +}
> 
> Also I am not a fan of the way this is handled currently. If you don't

Re: [PATCH v2 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-03 Thread Ding Tianhong


On 2017/6/4 2:19, Alexander Duyck wrote:
> On Fri, Jun 2, 2017 at 9:04 PM, Ding Tianhong <dingtianh...@huawei.com> wrote:
>> The PCIe Device Control Register use the bit 4 to indicate that
>> whether the device is permitted to enable relaxed ordering or not.
>> But relaxed ordering is not safe for some platform which could only
>> use strong write ordering, so devices are allowed (but not required)
>> to enable relaxed ordering bit by default.
>>
>> If a platform support relaxed ordering but does not enable it by
>> default, enable it in the PCIe configuration. This allows some device
>> to send TLPs with the relaxed ordering attributes set, which may
>> improve the performance.
>>
>> Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
>> ---
>>  drivers/pci/pci.c   | 42 ++
>>  drivers/pci/probe.c | 11 +++
>>  include/linux/pci.h |  3 +++
>>  3 files changed, 56 insertions(+)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index b01bd5b..f57a374 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -4878,6 +4878,48 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>  EXPORT_SYMBOL(pcie_set_mps);
>>
>>  /**
>> + * pcie_set_relaxed_ordering - set PCI Express relexed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible sets relaxed ordering
>> + */
>> +int pcie_set_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, 
>> PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_set_relaxed_ordering);
>> +
>> +/**
>> + * pcie_clear_relaxed_ordering - clear PCI Express relexed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * If possible clear relaxed ordering
>> + */
>> +int pcie_clear_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, 
>> PCI_EXP_DEVCTL_RELAX_EN);
>> +}
>> +EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
>> +
>> +/**
>> + * pcie_get_relaxed_ordering - check PCI Express relexed ordering bit
>> + * @dev: PCI device to query
>> + *
>> + * Returns true if relaxed ordering is been set
>> + */
>> +int pcie_get_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   u16 v;
>> +
>> +   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
>> +
>> +   return (v & PCI_EXP_DEVCTL_RELAX_EN) >> 4;
>> +}
>> +EXPORT_SYMBOL(pcie_get_relaxed_ordering);
>> +
>> +/**
>> + * pcie_set_mps - set PCI Express maximum payload size
>> +/**
>>   * pcie_get_minimum_link - determine minimum link settings of a PCI device
>>   * @dev: PCI device to query
>>   * @speed: storage for minimum speed
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index 19c8950..aeb22b5 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1701,6 +1701,16 @@ static void pci_configure_extended_tags(struct 
>> pci_dev *dev)
>>  PCI_EXP_DEVCTL_EXT_TAG);
>>  }
>>
>> +static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>> +{
>> +   int ret;
>> +
>> +   if (dev && (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING))
> 
> So there is a minor issue here. The problem is this is only trying to
> modify relaxed ordering for the device itself. That isn't what we
> want. What we want is to modify it on all of the upstream port
> interfaces where there is something the path to the root complex that
> has an issue. So if the root complex has to set the
> NO_RELAXED_ORDERING flag on a root port, all of the interfaces below
> it that would be pushing traffic toward it should not have the relaxed
> ordering bit set.
> 
> Also I am pretty sure this is a PCIe capability, not a PCI capability.
> You probably need to make sure you code is making this distinction
> which I don't know if it currently is. If you need an example of the
> kind of checks I am suggesting just take a look at
> pcie_configure_mps(). It is verifying the function is PCIe before
> attempting to make any updates. In your case you will probably also
> need to make sure there is a bus for you to walk up the chain of.
> Otherwise this shouldn't apply.
> 

Yes, I miss the upstream ports and the pcie/pci capability, will check
the pcie_configure_mps() again and fix it, thanks.

> 
>> +   pcie_set_relaxed_ordering(dev);
>> +   else
>> +

[PATCH v2 2/3] PCI: Enable PCIe Relaxed Ordering if supported

2017-06-02 Thread Ding Tianhong
The PCIe Device Control Register use the bit 4 to indicate that
whether the device is permitted to enable relaxed ordering or not.
But relaxed ordering is not safe for some platform which could only
use strong write ordering, so devices are allowed (but not required)
to enable relaxed ordering bit by default.

If a platform support relaxed ordering but does not enable it by
default, enable it in the PCIe configuration. This allows some device
to send TLPs with the relaxed ordering attributes set, which may
improve the performance.

Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/pci.c   | 42 ++
 drivers/pci/probe.c | 11 +++
 include/linux/pci.h |  3 +++
 3 files changed, 56 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5b..f57a374 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4878,6 +4878,48 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
 EXPORT_SYMBOL(pcie_set_mps);
 
 /**
+ * pcie_set_relaxed_ordering - set PCI Express relexed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible sets relaxed ordering
+ */
+int pcie_set_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, 
PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_set_relaxed_ordering);
+
+/**
+ * pcie_clear_relaxed_ordering - clear PCI Express relexed ordering bit
+ * @dev: PCI device to query
+ *
+ * If possible clear relaxed ordering
+ */
+int pcie_clear_relaxed_ordering(struct pci_dev *dev)
+{
+   return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, 
PCI_EXP_DEVCTL_RELAX_EN);
+}
+EXPORT_SYMBOL(pcie_clear_relaxed_ordering);
+
+/**
+ * pcie_get_relaxed_ordering - check PCI Express relexed ordering bit
+ * @dev: PCI device to query
+ *
+ * Returns true if relaxed ordering is been set
+ */
+int pcie_get_relaxed_ordering(struct pci_dev *dev)
+{
+   u16 v;
+
+   pcie_capability_read_word(dev, PCI_EXP_DEVCTL, );
+
+   return (v & PCI_EXP_DEVCTL_RELAX_EN) >> 4;
+}
+EXPORT_SYMBOL(pcie_get_relaxed_ordering);
+
+/**
+ * pcie_set_mps - set PCI Express maximum payload size
+/**
  * pcie_get_minimum_link - determine minimum link settings of a PCI device
  * @dev: PCI device to query
  * @speed: storage for minimum speed
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 19c8950..aeb22b5 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1701,6 +1701,16 @@ static void pci_configure_extended_tags(struct pci_dev 
*dev)
 PCI_EXP_DEVCTL_EXT_TAG);
 }
 
+static void pci_configure_relaxed_ordering(struct pci_dev *dev)
+{
+   int ret;
+
+   if (dev && (dev->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING))
+   pcie_set_relaxed_ordering(dev);
+   else
+   pcie_clear_relaxed_ordering(dev);
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
struct hotplug_params hpp;
@@ -1708,6 +1718,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
pci_configure_mps(dev);
pci_configure_extended_tags(dev);
+   pci_configure_relaxed_ordering(dev);
 
memset(, 0, sizeof(hpp));
ret = pci_get_hp_params(dev, );
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e1e8428..84bd6af 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1105,6 +1105,9 @@ int __pci_enable_wake(struct pci_dev *dev, pci_power_t 
state,
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
+int pcie_set_relaxed_ordering(struct pci_dev *dev);
+int pcie_clear_relaxed_ordering(struct pci_dev *dev);
+int pcie_get_relaxed_ordering(struct pci_dev *dev);
 
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
  bool enable)
-- 
1.9.0




[PATCH v2 3/3] net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-02 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

cxgb4 Ethernet driver now queries Root Complex Port to determine if it can
send TLPs to it with the Relaxed Ordering Attribute set.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index e88c180..478f25a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -521,6 +521,7 @@ enum { /* adapter flags */
USING_SOFT_PARAMS  = (1 << 6),
MASTER_PF  = (1 << 7),
FW_OFLD_CONN   = (1 << 9),
+   ROOT_NO_RELAXED_ORDERING = (1 << 10),
 };
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 38a5c67..fbfe341 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4628,6 +4628,7 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
 #ifdef CONFIG_PCI_IOV
u32 v, port_vec;
 #endif
+   struct pci_dev *root;
 
printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
 
@@ -4726,6 +4727,22 @@ static int init_one(struct pci_dev *pdev, const struct 
pci_device_id *ent)
adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
+   /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
+* Ingress Packet Data to Free List Buffers in order to allow for
+* chipset performance optimizations between the Root Complex and
+* Memory Controllers.  (Messages to the associated Ingress Queue
+* notifying new Packet Placement in the Free Lists Buffers will be
+* send without the Relaxed Ordering Attribute thus guaranteing that
+* all preceding PCIe Transaction Layer Packets will be processed
+* first.)  But some Root Complexes have various issues with Upstream
+* Transaction Layer Packets with the Relaxed Ordering Attribute set.
+* So we check our Root Complex to see if it's flaged with advice
+* against using Relaxed Ordering.
+*/
+   root = pci_find_pcie_root_port(adapter->pdev);
+   if (pcie_get_relaxed_ordering(root))
+   adapter->flags |= ROOT_NO_RELAXED_ORDERING;
+
spin_lock_init(>stats_lock);
spin_lock_init(>tid_release_lock);
spin_lock_init(>win0_lock);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index f05f0d4..ac229a3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2571,6 +2571,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
struct fw_iq_cmd c;
struct sge *s = >sge;
struct port_info *pi = netdev_priv(dev);
+   int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING);
 
/* Size needs to be multiple of 16, including status entry. */
iq->size = roundup(iq->size, 16);
@@ -2624,8 +2625,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct 
sge_rspq *iq, bool fwevtq,
 
flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
-FW_IQ_CMD_FL0FETCHRO_F |
-FW_IQ_CMD_FL0DATARO_F |
+FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
+FW_IQ_CMD_FL0DATARO_V(relaxed) |
 FW_IQ_CMD_FL0PADEN_F);
if (cong >= 0)
c.iqns_to_fl0congen |=
-- 
1.9.0




[PATCH v2 1/3] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING

2017-06-02 Thread Ding Tianhong
From: Casey Leedom <lee...@chelsio.com>

The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING indicates that the Relaxed
Ordering Attribute should not be used on Transaction Layer Packets destined
for the PCIe End Node so flagged.  Initially flagged this way are Intel
E5-26xx Root Complex Ports which suffer from a Flow Control Credit
Performance Problem and AMD A1100 ARM ("SEATTLE") Root Complex Ports which
don't obey PCIe 3.0 ordering rules which can lead to Data Corruption.

Signed-off-by: Casey Leedom <lee...@chelsio.com>
Signed-off-by: Ding Tianhong <dingtianh...@huawei.com>
---
 drivers/pci/quirks.c | 38 ++
 include/linux/pci.h  |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 085fb78..58bdd23 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3999,6 +3999,44 @@ static void quirk_tw686x_class(struct pci_dev *pdev)
  quirk_tw686x_class);
 
 /*
+ * Some devices have problems with Transaction Layer Packets with the Relaxed
+ * Ordering Attribute set.  Such devices should mark themselves and other
+ * Device Drivers should check before sending TLPs with RO set.
+ */
+static void quirk_relaxedordering_disable(struct pci_dev *dev)
+{
+   dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING;
+}
+
+/*
+ * Intel E5-26xx Root Complex has a Flow Control Credit issue which can
+ * cause performance problems with Upstream Transaction Layer Packets with
+ * Relaxed Ordering set.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
+ * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex
+ * where Upstream Transaction Layer Packets with the Relaxed Ordering
+ * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering
+ * set.  This is a violation of the PCIe 3.0 Transaction Ordering Rules
+ * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0
+ * November 10, 2010).  As a result, on this platform we can't use Relaxed
+ * Ordering for Upstream TLPs.
+ */
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, 
PCI_CLASS_NOT_DEFINED, 8,
+ quirk_relaxedordering_disable);
+
+/*
  * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same
  * values for the Attribute as were supplied in the header of the
  * corresponding Request, except as explicitly allowed when IDO is used."
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b..e1e8428 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -183,6 +183,8 @@ enum pci_dev_flags {
PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
/* Do not use FLR even if device advertises PCI_AF_CAP */
PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+   /* Don't use Relaxed Ordering for TLPs directed at this device */
+   PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
-- 
1.9.0




  1   2   >