from:"Babu Moger"

[PATCH] pci: Limit VPD length for megaraid_sas adapter

2015-11-10 Thread Babu Moger

Resending again. Sorry if it is duplicate. My email client 
seems to have some issues.

Reading or Writing of PCI VPD data causes system panic.
We saw this problem by running "lspci -vvv" in the beginning.
However this can be easily reproduced by running
 cat /sys/bus/devices/XX../vpd

VPD length has been set as 32768 by default. Accessing vpd
will trigger read/write of 32k. This causes problem as we
could read data beyond the VPD end tag. Behaviour is un-
predictable when this happens. I see some other adapter doing
similar quirks(commit id bffadffd43d438c3143b8d172a463de89345b836)

I see there is an attempt to fix this right way.
https://patchwork.ozlabs.org/patch/534843/ or
https://lkml.org/lkml/2015/10/23/97

Tried to fix it this way, but problem is I dont see the proper
start/end TAGs(at least for this adapter) at all. The data is
mostly junk or zeros. This patch fixes the issue by setting the
vpd length to 0.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 drivers/pci/quirks.c|   49 +++
 include/linux/pci_ids.h |   12 +++
 2 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b03373f..c32cd07 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2123,6 +2123,55 @@ static void quirk_via_cx700_pci_parking_caching(struct 
pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
quirk_via_cx700_pci_parking_caching);
 
 /*
+ * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
+ * will dump 32k of data. The default length is set as 32768.
+ * Reading a full 32k will cause an access beyond the VPD end tag.
+ * The system behaviour at that point is mostly unpredictable.
+ * Also I dont believe vendors have implemented this VPD headers properly.
+ * Atleast I dont see it in following megaraid sas controller.
+ * That is why adding the quirk here.
+ */
+static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev)
+{
+   if (dev->vpd)
+   dev->vpd->len = 0;
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_SAS1078R,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_SAS1078DE,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_VERDE_ZCR,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_SAS1078GEN2,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_SAS0079GEN2,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_SAS0073SKINNY,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_SAS0071SKINNY,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_FUSION,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_PLASMA,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_INVADER,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC,
+   PCI_DEVICE_ID_LSI_FURY,
+   quirk_megaraid_sas_limit_vpd);
+
+/*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
  * VPD end tag will hang the device.  This problem was initially
  * observed when a vpd entry was created in sysfs
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d9ba49c..20c5103 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -213,6 +213,18 @@
 #define PCI_DEVICE_ID_LSI_SAS1068E 0x0058
 #define PCI_DEVICE_ID_LSI_SAS1078  0x0060
 
+#define PCI_DEVICE_ID_LSI_SAS1078R  0x0060
+#define PCI_DEVICE_ID_LSI_SAS1078DE 0x007C
+#define PCI_DEVICE_ID_LSI_VERDE_ZCR 0x0413
+#define PCI_DEVICE_ID_LSI_SAS1078GEN2   0x0078
+#define PCI_DEVICE_ID_LSI_SAS0079GEN2   0x0079
+#define PCI_DEVICE_ID_LSI_SAS0073SKINNY 0x0073
+#define PCI_DEVICE_ID_LSI_SAS0071SKINNY 0x0071
+#define PCI_DEVICE_ID_LSI_FUSION0x005b
+#define PCI_DEVICE_ID_LSI_PLASMA0x002f
+#define PCI_DEVICE_ID_LSI_INVADER   0x005d
+#define PCI_DEVICE_ID_LSI_FURY  0x005f
+
 #define PCI_VENDOR_ID_ATI  0x1002
 /* Mach64 */
 #define PCI_DEVICE_ID_ATI_688000x4158
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kern

[PATCH v2] pci: Limit VPD length for megaraid_sas adapter

2015-11-11 Thread Babu Moger

Changes since v1 -> v2
Removed the changes in pci_id.h. Kept all the vendor
ids in quirks.c

Reading or Writing of PCI VPD data causes system panic.
We saw this problem by running "lspci -vvv" in the beginning.
However this can be easily reproduced by running
 cat /sys/bus/devices/XX../vpd

VPD length has been set as 32768 by default. Accessing vpd
will trigger read/write of 32k. This causes problem as we
could read data beyond the VPD end tag. Behaviour is un-
predictable when this happens. I see some other adapter doing
similar quirks(commit id bffadffd43d438c3143b8d172a463de89345b836)

I see there is an attempt to fix this right way.
https://patchwork.ozlabs.org/patch/534843/ or
https://lkml.org/lkml/2015/10/23/97

Tried to fix it this way, but problem is I dont see the proper
start/end TAGs(at least for this adapter) at all. The data is
mostly junk or zeros. This patch fixes the issue by setting the
vpd length to 0.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 drivers/pci/quirks.c |   38 ++
 1 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b03373f..f739e47 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2123,6 +2123,44 @@ static void quirk_via_cx700_pci_parking_caching(struct 
pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
quirk_via_cx700_pci_parking_caching);
 
 /*
+ * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
+ * will dump 32k of data. The default length is set as 32768.
+ * Reading a full 32k will cause an access beyond the VPD end tag.
+ * The system behaviour at that point is mostly unpredictable.
+ * Also I dont believe vendors have implemented this VPD headers properly.
+ * Atleast I dont see it in following megaraid sas controller.
+ * That is why adding the quirk here.
+ */
+static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev)
+{
+   if (dev->vpd)
+   dev->vpd->len = 0;
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f,
+   quirk_megaraid_sas_limit_vpd);
+
+/*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
  * VPD end tag will hang the device.  This problem was initially
  * observed when a vpd entry was created in sysfs
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] pci: Limit VPD length for megaraid_sas adapter

2015-11-11 Thread Babu Moger


On 11/11/2015 1:30 PM, Myron Stowe wrote:
> On Wed, Nov 11, 2015 at 8:54 AM, Babu Moger <babu.mo...@oracle.com> wrote:
>> Changes since v1 -> v2
>> Removed the changes in pci_id.h. Kept all the vendor
>> ids in quirks.c
>>
>> Reading or Writing of PCI VPD data causes system panic.
>> We saw this problem by running "lspci -vvv" in the beginning.
>> However this can be easily reproduced by running
>>  cat /sys/bus/devices/XX../vpd
>>
>> VPD length has been set as 32768 by default. Accessing vpd
>> will trigger read/write of 32k. This causes problem as we
>> could read data beyond the VPD end tag. Behaviour is un-
>> predictable when this happens. I see some other adapter doing
>> similar quirks(commit id bffadffd43d438c3143b8d172a463de89345b836)
>>
>> I see there is an attempt to fix this right way.
>> https://patchwork.ozlabs.org/patch/534843/ or
>> https://lkml.org/lkml/2015/10/23/97
>>
>> Tried to fix it this way, but problem is I dont see the proper
>> start/end TAGs(at least for this adapter) at all. The data is
>> mostly junk or zeros. This patch fixes the issue by setting the
>> vpd length to 0.
>>
>> Signed-off-by: Babu Moger <babu.mo...@oracle.com>
>> ---
>>  drivers/pci/quirks.c |   38 ++
>>  1 files changed, 38 insertions(+), 0 deletions(-)
>>
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index b03373f..f739e47 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -2123,6 +2123,44 @@ static void 
>> quirk_via_cx700_pci_parking_caching(struct pci_dev *dev)
>>  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
>> quirk_via_cx700_pci_parking_caching);
>>
>>  /*
>> + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
>> + * will dump 32k of data. The default length is set as 32768.
>> + * Reading a full 32k will cause an access beyond the VPD end tag.
>> + * The system behaviour at that point is mostly unpredictable.
>> + * Also I dont believe vendors have implemented this VPD headers properly.
>> + * Atleast I dont see it in following megaraid sas controller.
>> + * That is why adding the quirk here.
>> + */
>> +static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev)
>> +{
>> +   if (dev->vpd)
>> +   dev->vpd->len = 0;
>> +}
>> +
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d,
>> +   quirk_megaraid_sas_limit_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f,
>> +   quirk_megaraid_sas_limit_vpd);
>> +
>> +/*
>>   * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
>>   * VPD end tag will hang the device.  This problem was initially
>>   * observed when a vpd entry was created in sysfs
>> --
>> 1.7.1
>>
> 
> Just to confirm, I've encountered similar results on a MegaRAID SAS 2208 -

Myron, Thanks for confirmation. With most of the devices behaving this way, 
I feel the default length is set too high. Anyway that is Bjorn's call. 
For this adapter, I think we should set the length to 0.
  
> 
>   $ lspci -vvv -s 02:00.0
>   02:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 2208
>   [Thunderbolt] (rev 05)
> Capabilities: [d0] Vital Product Data
> Unknown small resource type 00, will not decode more.
> 
>   $ cat /sys/devices/pci:00/:00:02.2/:02:00.0/vpd |
>  od -A x -t x1z -v
>   00 00 00 00 00 00 00 00 00 00 00 00 00 00 0

[PATCH v4] pci: Limit VPD length for megaraid_sas adapter

2015-12-03 Thread Babu Moger

Reading or Writing of PCI VPD data causes system panic.
We saw this problem by running "lspci -vvv" in the beginning.
However this can be easily reproduced by running
 cat /sys/bus/devices/XX../vpd

VPD length has been set as 32768 by default. Accessing vpd
will trigger read/write of 32k. This causes problem as we
could read data beyond the VPD end tag. Behaviour is un-
predictable when this happens. I see some other adapter doing
similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk
for Broadcom 5708S"))

I see there is an attempt to fix this right way.
https://patchwork.ozlabs.org/patch/534843/ or
https://lkml.org/lkml/2015/10/23/97

Tried to fix it this way, but problem is I dont see the proper
start/end TAGs(at least for this adapter) at all. The data is
mostly junk or zeros. This patch fixes the issue by setting the
vpd length to 0x80.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Reviewed-by: Khalid Aziz <khalid.a...@oracle.com>
Tested-by: Dmitry Klochkov <dmitry.kloch...@oracle.com>

Orabug: 22104511

Changes since v3 -> v4
We found some options of the lspci does not work very well if
it cannot find the valid vpd tag(Example command "lspci -s 10:00.0 -vv").
It displays the error message and exits right away. Setting the length
back to 0 fixes the problem.

Changes since v2 -> v3
Changed the vpd length from 0 to 0x80 which leaves the
option open for someone to read first few bytes.

Changes since v1 -> v2
Removed the changes in pci_id.h. Kept all the vendor
ids in quirks.c
---
 drivers/pci/quirks.c |   38 ++
 1 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b03373f..f739e47 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2123,6 +2123,44 @@ static void quirk_via_cx700_pci_parking_caching(struct 
pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
quirk_via_cx700_pci_parking_caching);
 
 /*
+ * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
+ * will dump 32k of data. The default length is set as 32768.
+ * Reading a full 32k will cause an access beyond the VPD end tag.
+ * The system behaviour at that point is mostly unpredictable.
+ * Also I dont believe vendors have implemented this VPD headers properly.
+ * Atleast I dont see it in following megaraid sas controller.
+ * That is why adding the quirk here.
+ */
+static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev)
+{
+   if (dev->vpd)
+   dev->vpd->len = 0;
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f,
+   quirk_megaraid_sas_limit_vpd);
+
+/*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
  * VPD end tag will hang the device.  This problem was initially
  * observed when a vpd entry was created in sysfs
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4] pci: Limit VPD length for megaraid_sas adapter

2015-12-07 Thread Babu Moger

Hi Bjorn,
  My old logs were lost. So, I had to recreate the issue again. So it took 
sometime.

On 12/7/2015 11:29 AM, Bjorn Helgaas wrote:
> Hi Babu,
> 
> On Thu, Dec 03, 2015 at 12:25:19PM -0800, Babu Moger wrote:
>> Reading or Writing of PCI VPD data causes system panic.
>> We saw this problem by running "lspci -vvv" in the beginning.
>> However this can be easily reproduced by running
>>  cat /sys/bus/devices/XX../vpd
> 
> What sort of panic is this?
 
  Actual panic stack showed total different area.  It looked like this.

  TSTATE: 80e01601 TPC: 007945c8 TNPC: 007945cc Y: 
Not tainted
TPC: <ehci_irq+0x94/0x388>
g0: 4000 g1: 084001604020 g2: 084001604024 g3: 
0acb
g4: 800fe42d0340 g5: 8000291ce000 g6: 800fe42f4000 g7: 
03114000
o0: 800fe085d99c o1: 800fe42f4008 o2: 4000 o3: 
0001
o4:  o5: 0012 sp: 80002047b2b1 ret_pc: 
00794540
RPC: <ehci_irq+0xc/0x388>
l0: 800fe085d980 l1: c001 l2: 000b l3: 
008e7058
l4: 00bd19a8 l5: 00bd6a88 l6:  l7: 

i0: 800fe085d800 i1: 0016 i2: 800c20c007c3 i3: 
f0265f78
i4: feff4748 i5: feff2ff8 i6: 80002047b3d1 i7: 
0077adf0
I7: <usb_hcd_irq+0x38/0xa0>
Call Trace:
 [0077adf0] usb_hcd_irq+0x38/0xa0
 [004d122c] handle_irq_event_percpu+0x8c/0x204
 [004d13d8] handle_irq_event+0x34/0x60
 [004d3998] handle_fasteoi_irq+0xdc/0x164
 [004d1178] generic_handle_irq+0x24/0x38
 [008dce68] handler_irq+0xb8/0xec
 [004208b4] tl0_irq5+0x14/0x20
 [0042cfac] cpu_idle+0x9c/0x18c
 [008d2ad0] after_lock_tlb+0x1b4/0x1cc
 []   (null)


While analyzing it from kdump, I saw  it stuck in  here below.

 PID: 5274   TASK: 800fe1198680  CPU: 0   COMMAND: "cat"
#0 [800fe25f6f81] switch_to_pc at 8d725c
#1 [800fe25f70e1] pci_user_read_config_word at 6c4698
#2 [800fe25f71a1] pci_vpd_pci22_wait at 6c4710
#3 [800fe25f7261] pci_vpd_pci22_read at 6c4994
#4 [800fe25f7321] pci_read_vpd at 6c3e90
#5 [800fe25f73d1] read_vpd_attr at 6ccc78
#6 [800fe25f7481] read at 5be478
#7 [800fe25f7531] vfs_read at 54fdb0
#8 [800fe25f75e1] sys_read at 54ff10
#9 [800fe25f76a1] linux_sparc_syscall at 4060f4
TSTATE=0x8082000223 TT=0x16d TPC=0xfc0100295e28 TNPC=0xfc0100295e2c
 r0=0x  r1=0x0003  r2=0x0020aec0
 r3=0x0020aec4  r4=0x0b00  r5=0x033f
 r6=0x0001  r7=0xfc0106f0 r24=0x0003
r25=0x0020e000 r26=0x8000 r27=0x
r28=0x r29=0x r30=0x07feffb468d1
r31=0x00105d94

 

> 
> This seems like a defect in the megaraid hardware or firmware.  If the
> VPD ROM contains junk, there's no hope that software can read the data
> and figure out how much is safe to read.

Yes this looks like problem with megaraid hardware. 

Other day,  Myron stowe(myron.st...@gmail.com) reported similar problem with 
his setup.

 $ lspci -vvv -s 02:00.0
  02:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 2208
  [Thunderbolt] (rev 05)
Capabilities: [d0] Vital Product Data
Unknown small resource type 00, will not decode more.

  $ cat /sys/devices/pci:00/:00:02.2/:02:00.0/vpd |
 od -A x -t x1z -v
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ><
  *
  007ff0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ><
  008000


> 
> I assume VPD is useful for somebody, and I hate to silently disable
> the whole thing.  We might want to at least log a note about what
> we're doing.

Sure. Let me know what you think.


> 
> Bjorn
> 
>> VPD length has been set as 32768 by default. Accessing vpd
>> will trigger read/write of 32k. This causes problem as we
>> could read data beyond the VPD end tag. Behaviour is un-
>> predictable when this happens. I see some other adapter doing
>> similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk
>> for Broadcom 5708S"))
>>
>> I see there is an attempt to fix this right way.
>> https://patchwork.ozlabs.org/patch/534843/ or
>> https://lkml.org/lkml/2015/10/23/97
>>
>> Tried to fix it this way, but problem is I dont see the proper
>> start/end TAGs(at least for this adapter) at all. The data is
>> mostly junk or zeros. This patch fixes the issue by setting the
>> vpd length to 0x80.
>>
>> Signed-off-by: Babu Moger <babu.mo...@oracle.com>
>> Reviewed-by: Khalid Aziz <khalid.a...@ora

[PATCH] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers

2016-01-07 Thread Babu Moger

I have never seen auto handoff working on TI and RENESAS cards.
Eventually, we force handoff. This code forces the handoff
unconditionally. It saves 5 seconds boot time for each card.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 drivers/usb/host/pci-quirks.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index f940056..b7ee895 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -1003,6 +1003,13 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev)
ext_cap_offset = xhci_find_next_cap_offset(base, 
ext_cap_offset);
} while (1);
 
+   /* Auto handoff never worked for these devices. Force it and continue */
+   if (pdev->vendor == PCI_VENDOR_ID_TI ||
+   pdev->vendor == PCI_VENDOR_ID_RENESAS) {
+   val  =  (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED;
+   writel(val, base + ext_cap_offset);
+   }
+
/* If the BIOS owns the HC, signal that the OS wants it, and wait */
if (val & XHCI_HC_BIOS_OWNED) {
writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4] pci: Limit VPD length for megaraid_sas adapter

2015-12-25 Thread Babu Moger

Hi Bjorn, Checking again. 
How about adding some messages in the logs to let user know that vpd has been 
disabled on this device in case if there is an attempt to access vpd.
What do you think?. Thanks
Babu

On 12/7/2015 5:07 PM, Babu Moger wrote:
> Hi Bjorn,
>   My old logs were lost. So, I had to recreate the issue again. So it took 
> sometime.
> 
> On 12/7/2015 11:29 AM, Bjorn Helgaas wrote:
>> Hi Babu,
>>
>> On Thu, Dec 03, 2015 at 12:25:19PM -0800, Babu Moger wrote:
>>> Reading or Writing of PCI VPD data causes system panic.
>>> We saw this problem by running "lspci -vvv" in the beginning.
>>> However this can be easily reproduced by running
>>>  cat /sys/bus/devices/XX../vpd
>>
>> What sort of panic is this?
>  
>   Actual panic stack showed total different area.  It looked like this.
> 
>   TSTATE: 80e01601 TPC: 007945c8 TNPC: 007945cc Y: 
> Not tainted
> TPC: <ehci_irq+0x94/0x388>
> g0: 4000 g1: 084001604020 g2: 084001604024 g3: 
> 0acb
> g4: 800fe42d0340 g5: 8000291ce000 g6: 800fe42f4000 g7: 
> 03114000
> o0: 800fe085d99c o1: 800fe42f4008 o2: 4000 o3: 
> 0001
> o4:  o5: 0012 sp: 80002047b2b1 ret_pc: 
> 00794540
> RPC: <ehci_irq+0xc/0x388>
> l0: 800fe085d980 l1: c001 l2: 000b l3: 
> 008e7058
> l4: 00bd19a8 l5: 00bd6a88 l6:  l7: 
> 
> i0: 800fe085d800 i1: 0016 i2: 800c20c007c3 i3: 
> f0265f78
> i4: feff4748 i5: feff2ff8 i6: 80002047b3d1 i7: 
> 0077adf0
> I7: <usb_hcd_irq+0x38/0xa0>
> Call Trace:
>  [0077adf0] usb_hcd_irq+0x38/0xa0
>  [004d122c] handle_irq_event_percpu+0x8c/0x204
>  [004d13d8] handle_irq_event+0x34/0x60
>  [004d3998] handle_fasteoi_irq+0xdc/0x164
>  [004d1178] generic_handle_irq+0x24/0x38
>  [008dce68] handler_irq+0xb8/0xec
>  [004208b4] tl0_irq5+0x14/0x20
>  [0042cfac] cpu_idle+0x9c/0x18c
>  [008d2ad0] after_lock_tlb+0x1b4/0x1cc
>  []   (null)
> 
> 
> While analyzing it from kdump, I saw  it stuck in  here below.
> 
>  PID: 5274   TASK: 800fe1198680  CPU: 0   COMMAND: "cat"
> #0 [800fe25f6f81] switch_to_pc at 8d725c
> #1 [800fe25f70e1] pci_user_read_config_word at 6c4698
> #2 [800fe25f71a1] pci_vpd_pci22_wait at 6c4710
> #3 [800fe25f7261] pci_vpd_pci22_read at 6c4994
> #4 [800fe25f7321] pci_read_vpd at 6c3e90
> #5 [800fe25f73d1] read_vpd_attr at 6ccc78
> #6 [800fe25f7481] read at 5be478
> #7 [800fe25f7531] vfs_read at 54fdb0
> #8 [800fe25f75e1] sys_read at 54ff10
> #9 [800fe25f76a1] linux_sparc_syscall at 4060f4
> TSTATE=0x8082000223 TT=0x16d TPC=0xfc0100295e28 TNPC=0xfc0100295e2c
>  r0=0x  r1=0x0003  r2=0x0020aec0
>  r3=0x0020aec4  r4=0x0b00  r5=0x033f
>  r6=0x0001  r7=0xfc0106f0 r24=0x0003
> r25=0x0020e000 r26=0x8000 r27=0x
> r28=0x r29=0x r30=0x07feffb468d1
> r31=0x00105d94
> 
>  
> 
>>
>> This seems like a defect in the megaraid hardware or firmware.  If the
>> VPD ROM contains junk, there's no hope that software can read the data
>> and figure out how much is safe to read.
> 
> Yes this looks like problem with megaraid hardware. 
> 
> Other day,  Myron stowe(myron.st...@gmail.com) reported similar problem with 
> his setup.
> 
>  $ lspci -vvv -s 02:00.0
>   02:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 2208
>   [Thunderbolt] (rev 05)
> Capabilities: [d0] Vital Product Data
> Unknown small resource type 00, will not decode more.
> 
>   $ cat /sys/devices/pci:00/:00:02.2/:02:00.0/vpd |
>  od -A x -t x1z -v
>   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ><
>   *
>   007ff0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ><
>   008000
> 
> 
>>
>> I assume VPD is useful for somebody, and I hate to silently disable
>> the whole thing.  We might want to at least log a note about what
>> we're doing.
> 
> Sure. Let me know what you think.
> 
> 
>>
>> Bjorn
>>
>>> VPD length has been set as 32768 by default. Accessing vpd
>>> will trigger read/write of 32k. This causes problem as we
>>> c

[PATCH v3] pci: Limit VPD length for megaraid_sas adapter

2015-11-19 Thread Babu Moger

Reading or Writing of PCI VPD data causes system panic.
We saw this problem by running "lspci -vvv" in the beginning.
However this can be easily reproduced by running
 cat /sys/bus/devices/XX../vpd

VPD length has been set as 32768 by default. Accessing vpd
will trigger read/write of 32k. This causes problem as we
could read data beyond the VPD end tag. Behaviour is un-
predictable when this happens. I see some other adapter doing
similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk
for Broadcom 5708S"))

I see there is an attempt to fix this right way.
https://patchwork.ozlabs.org/patch/534843/ or
https://lkml.org/lkml/2015/10/23/97

Tried to fix it this way, but problem is I dont see the proper
start/end TAGs(at least for this adapter) at all. The data is
mostly junk or zeros. This patch fixes the issue by setting the
vpd length to 0x80.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Reviewed-by: Khalid Aziz <khalid.a...@oracle.com>

Changes since v2 -> v3
Changed the vpd length from 0 to 0x80 which leaves the
option open for someone to read first few bytes.

Changes since v1 -> v2
Removed the changes in pci_id.h. Kept all the vendor
ids in quirks.c
---
 drivers/pci/quirks.c |   38 ++
 1 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b03373f..b8774e2 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2123,6 +2123,44 @@ static void quirk_via_cx700_pci_parking_caching(struct 
pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
quirk_via_cx700_pci_parking_caching);
 
 /*
+ * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
+ * will dump 32k of data. The default length is set as 32768.
+ * Reading a full 32k will cause an access beyond the VPD end tag.
+ * The system behaviour at that point is mostly unpredictable.
+ * Also I dont believe vendors have implemented this VPD headers properly.
+ * Atleast I dont see it in following megaraid sas controller.
+ * That is why adding the quirk here.
+ */
+static void quirk_megaraid_sas_limit_vpd(struct pci_dev *dev)
+{
+   if (dev->vpd)
+   dev->vpd->len = 0x80;
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d,
+   quirk_megaraid_sas_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f,
+   quirk_megaraid_sas_limit_vpd);
+
+/*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
  * VPD end tag will hang the device.  This problem was initially
  * observed when a vpd entry was created in sysfs
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ixgbe: Fix minor typo while freeing irq

2016-06-18 Thread Babu Moger

The array subscript increments after the execution of the statement.
So there is no issue here. However it helps to read the code better.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 569cb07..6f4fe66 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3016,7 +3016,7 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter)
free_irq(entry->vector, q_vector);
}
 
-   free_irq(adapter->msix_entries[vector++].vector, adapter);
+   free_irq(adapter->msix_entries[vector].vector, adapter);
 }
 
 /**
-- 
1.7.1

Re: [PATCHv2 4/4] pci: Blacklist vpd access for buggy devices

2016-02-09 Thread Babu Moger

On 2/9/2016 3:07 PM, Bjorn Helgaas wrote:
> There seem to be several revs of this patch, and it's hard for me to
> keep track of what's current.  If you want to update any patch in the
> series, please repost the entire series with a new version number.

 Here is the latest of patch 4/4.
 https://patchwork.kernel.org/patch/8084221/

 I will wait for Hannes's response before re-posting it.
 Hannes, If you want me to re-post all the series let me know.
 
> 
> On Wed, Jan 13, 2016 at 12:25:35PM +0100, Hannes Reinecke wrote:
>> From: Babu Moger <babu.mo...@oracle.com>
>>
>> Reading or Writing of PCI VPD data causes system panic.
>> We saw this problem by running "lspci -vvv" in the beginning.
>> However this can be easily reproduced by running
>>  cat /sys/bus/devices/XX../vpd
>>
>> As even a simple read on any VPD data triggers a system
>> lockup on certain cards this patch implements a PCI quirk
>> to disabling VPD acces altogether by setting the vpd length
> 
> s/acces/access/
> s/vpd/VPD/
> 
>> to '0'.
>>
>> Signed-off-by: Babu Moger <babu.mo...@oracle.com>
>> Signed-off-by: Hannes Reinecke <h...@suse.de>
>> ---
>>  drivers/pci/access.c |  5 -
>>  drivers/pci/quirks.c | 41 +
>>  2 files changed, 45 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/access.c b/drivers/pci/access.c
>> index 914e023..82f41a8 100644
>> --- a/drivers/pci/access.c
>> +++ b/drivers/pci/access.c
>> @@ -396,7 +396,7 @@ static ssize_t pci_vpd_pci22_read(struct pci_dev *dev, 
>> loff_t pos, size_t count,
>>  if (pos < 0)
>>  return -EINVAL;
>>  
>> -if (!vpd->valid) {
>> +if (!vpd->valid && vpd->base.len > 0) {
>>  vpd->valid = true;
>>  vpd->base.len = pci_vpd_pci22_size(dev);
>>  }
>> @@ -459,6 +459,9 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, 
>> loff_t pos, size_t count
>>  loff_t end = pos + count;
>>  int ret = 0;
>>  
>> +if (vpd->base.len == 0)
>> +return -EIO;
>> +
>>  if (!vpd->valid)
>>  return -EAGAIN;
>>  
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index 7e32730..af0f8a1 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -2123,6 +2123,47 @@ static void 
>> quirk_via_cx700_pci_parking_caching(struct pci_dev *dev)
>>  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
>> quirk_via_cx700_pci_parking_caching);
>>  
>>  /*
>> + * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
>> + * will dump 32k of data. The default length is set as 32768.
>> + * Reading a full 32k will cause an access beyond the VPD end tag.
>> + * The system behaviour at that point is mostly unpredictable.
>> + * Apparently, some vendors have not implemented this VPD headers properly.
>> + * Adding a generic function disable vpd data for these buggy adapters
>> + * Add the DECLARE_PCI_FIXUP_FINAL line below with the specific with
>> + * vendor and device of interest to use this quirk.
>> + */
>> +static void quirk_blacklist_vpd(struct pci_dev *dev)
>> +{
>> +if (dev->vpd) {
>> +dev->vpd->len = 0;
>> +dev_warn(>dev, "PCI vpd access has been disabled due to 
>> firmware bug\n");
> 
> "PCI" is superfluous and "VPD" should be capitalized.
> 
>> +}
>> +}
>> +
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d,
>> +quirk_blacklist_vpd);
>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f,
>> +quirk_blacklist_vpd);
>> +
>> +/*
>>   * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
>>   * VPD end tag will hang the device.  This problem was initially
>>   * observed when a vpd entry was created in sysfs
>> -- 
>> 1.8.5.6
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC] pci: Blacklist vpd access for buggy devices

2016-01-21 Thread Babu Moger



On 1/21/2016 9:47 AM, jordan_hargr...@dell.com wrote:
>> From: Babu Moger [babu.mo...@oracle.com]
>> Sent: Tuesday, January 19, 2016 2:39 PM
>> To: Hargrave, Jordan; bhelg...@google.com
>> Cc: linux-...@vger.kernel.org; linux-kernel@vger.kernel.org; 
>> alexander.du...@gmail.com; h...@suse.de; mkube...@suse.com; 
>> shane.seym...@hpe.com; myron.st...@gmail.com
>> Subject: Re: [PATCH RFC] pci: Blacklist vpd access for buggy devices
>>
>> Hi Jordan,
>>
>> On 1/19/2016 9:22 AM, jordan_hargr...@dell.com wrote:
>>> From: Babu Moger [babu.mo...@oracle.com]
>>> Sent: Monday, January 11, 2016 4:49 PM
>>> To: bhelg...@google.com
>>> Cc: linux-...@vger.kernel.org; linux-kernel@vger.kernel.org; 
>>> alexander.du...@gmail.com; h...@suse.de; mkube...@suse.com; 
>>> shane.seym...@hpe.com; myron.st...@gmail.com; 
>>> venkatkumar.duvv...@avago.com; Hargrave, Jordan
>>> Subject: Re: [PATCH RFC] pci: Blacklist vpd access for buggy devices
>>>
>>> Sorry. Missed Jordan.
>>>
>>> On 1/11/2016 3:13 PM, Babu Moger wrote:
>>>> Reading or Writing of PCI VPD data causes system panic.
>>>> We saw this problem by running "lspci -vvv" in the beginning.
>>>> However this can be easily reproduced by running
>>>>  cat /sys/bus/devices/XX../vpd
>>>>
>>>> VPD length has been set as 32768 by default. Accessing vpd
>>>> will trigger read/write of 32k. This causes problem as we
>>>> could read data beyond the VPD end tag. Behaviour is un-
>>>> predictable when this happens. I see some other adapter doing
>>>> similar quirks(commit bffadffd43d4 ("PCI: fix VPD limit quirk
>>>> for Broadcom 5708S"))
>>>>
>>>> I see there is an attempt to fix this right way.
>>>> https://patchwork.ozlabs.org/patch/534843/ or
>>>> https://lkml.org/lkml/2015/10/23/97
>>>>
>>>> Tried to fix it this way, but problem is I dont see the proper
>>>> start/end TAGs(at least for this adapter) at all. The data is
>>>> mostly junk or zeros. This patch fixes the issue by setting the
>>>> vpd length to 0x80.
>>>>
>>>> Also look at the threds
>>>>
>>>> https://lkml.org/lkml/2015/11/10/557
>>>> https://lkml.org/lkml/2015/12/29/315
>>>>
>>>> Signed-off-by: Babu Moger <babu.mo...@oracle.com>
>>>> ---
>>>>
>>>> NOTE:
>>>> Jordan, Are you sure all the devices in PCI_VENDOR_ID_ATHEROS and
>>>> PCI_VENDOR_ID_ATTANSIC have this problem. You have used PCI_ANY_ID.
>>>> I felt it is too broad. Can you please check.
>>>>
>>>
>>> I don't actually have that hardware, it was a bugfix for biosdevname for 
>>> RedHat.  We were getting
>>> 'BUG: soft lockup - CPU#0 stuck for 23s!'  when attempting to read the vpd 
>>> area.
>>>
>>> Certainly 0x1969:0x1026 experienced this.
>>
>> Ok. Thanks. I will update the patch 4/4.
>>
> 
> Thanks! I also found 1969:2062. Maybe best to just block everything in 
> drivers/net/ethernet/atheros/

Ok. I will update the patch..


> 
> atl1c:
>  static const struct pci_device_id atl1c_pci_tbl[] = {
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L1C)},
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L2C)},
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L2C_B)},
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L2C_B2)},
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L1D)},
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATHEROS_L1D_2_0)},
>  /* required last entry */
>  { 0 }
> };
> 
> atl1e
>  static const struct pci_device_id atl1e_pci_tbl[] = {
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L1E)},
>  {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, 0x1066)},
>  /* required last entry */
>  { 0 }
>  };
> 
>>>
>>> 09:00.0 Ethernet controller: Atheros Communications AR8121/AR8113/AR8114 
>>> Gigabit or Fast Ethernet (rev b0)
>>> Subsystem: Atheros Communications AR8121/AR8113/AR8114 Gigabit or 
>>> Fast Ethernet
>>> Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
>>> Stepping- SERR- FastB2B- DisINTx+
>>> Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
>>> SERR- >> Latency: 0, Cache Line

[PATCH v4 4/4] pci: Blacklist vpd access for buggy devices

2016-01-21 Thread Babu Moger

Reading or Writing of PCI VPD data causes system panic.
We saw this problem by running "lspci -vvv" in the beginning.
However this can be easily reproduced by running
 cat /sys/bus/devices/XX../vpd

As even a simple read on any VPD data triggers a system
lockup on certain cards this patch implements a PCI quirk
to disabling VPD acces altogether by setting the vpd length
to '0'.

Added all the PCI_VENDOR_ID_ATTANSIC varients.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Signed-off-by: Hannes Reinecke <h...@suse.de>
Signed-off-by: Jordan Hargrave <jordan_hargr...@dell.com>
---
 drivers/pci/access.c |5 -
 drivers/pci/quirks.c |   43 +++
 2 files changed, 47 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 914e023..82f41a8 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -396,7 +396,7 @@ static ssize_t pci_vpd_pci22_read(struct pci_dev *dev, 
loff_t pos, size_t count,
if (pos < 0)
return -EINVAL;
 
-   if (!vpd->valid) {
+   if (!vpd->valid && vpd->base.len > 0) {
vpd->valid = true;
vpd->base.len = pci_vpd_pci22_size(dev);
}
@@ -459,6 +459,9 @@ static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, 
loff_t pos, size_t count
loff_t end = pos + count;
int ret = 0;
 
+   if (vpd->base.len == 0)
+   return -EIO;
+
if (!vpd->valid)
return -EAGAIN;
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b03373f..f0007e9 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2123,6 +2123,49 @@ static void quirk_via_cx700_pci_parking_caching(struct 
pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0x324e, 
quirk_via_cx700_pci_parking_caching);
 
 /*
+ * A read/write to sysfs entry ('/sys/bus/pci/devices//vpd')
+ * will dump 32k of data. The default length is set as 32768.
+ * Reading a full 32k will cause an access beyond the VPD end tag.
+ * The system behaviour at that point is mostly unpredictable.
+ * Apparently, some vendors have not implemented this VPD headers properly.
+ * Adding a generic function disable vpd data for these buggy adapters
+ * Add the DECLARE_PCI_FIXUP_FINAL line below with the specific with
+ * vendor and device of interest to use this quirk.
+ */
+static void quirk_blacklist_vpd(struct pci_dev *dev)
+{
+   if (dev->vpd) {
+   dev->vpd->len = 0;
+   dev_warn(>dev, "PCI vpd access has been disabled due to 
firmware bug\n");
+   }
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0060,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x007c,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0413,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0078,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0079,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0073,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x0071,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005b,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x002f,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f,
+   quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
+   quirk_blacklist_vpd);
+
+/*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
  * VPD end tag will hang the device.  This problem was initially
  * observed when a vpd entry was created in sysfs
-- 
1.7.1

[PATCH RFC] PCI: Fix for panic while enabling SR-IOV

2016-03-15 Thread Babu Moger

We noticed this panic while enabling SR-IOV in sparc.
===
mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan  1 2015)
mlx4_core: Initializing 0007:01:00.0
mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs
mlx4_core: Initializing 0007:01:00.1
Unable to handle kernel NULL pointer dereference
insmod(10010): Oops [#1]
CPU: 391 PID: 10010 Comm: insmod Not tainted
4.1.12-32.el6uek.kdump2.sparc64 #1
TPC: <dma_supported+0x20/0x80>
I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]>
Call Trace:
 [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core]
 [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core]
 [00725f14] local_pci_probe+0x34/0xa0
 [00726028] pci_call_probe+0xa8/0xe0
 [00726310] pci_device_probe+0x50/0x80
 [0079f700] really_probe+0x140/0x420
 [0079fa24] driver_probe_device+0x44/0xa0
 [0079fb5c] __device_attach+0x3c/0x60
 [0079d85c] bus_for_each_drv+0x5c/0xa0
 [0079f588] device_attach+0x88/0xc0
 [0071acd0] pci_bus_add_device+0x30/0x80
 [00736090] virtfn_add.clone.1+0x210/0x360
 [007364a4] sriov_enable+0x2c4/0x520
 [0073672c] pci_enable_sriov+0x2c/0x40
 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
 [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core]
Disabling lock debugging due to kernel taint
Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb5c]: __device_attach+0x3c/0x60
Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0
Caller[0079f588]: device_attach+0x88/0xc0
Caller[0071acd0]: pci_bus_add_device+0x30/0x80
Caller[00736090]: virtfn_add.clone.1+0x210/0x360
Caller[007364a4]: sriov_enable+0x2c4/0x520
Caller[0073672c]: pci_enable_sriov+0x2c/0x40
Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core]
Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb08]: __driver_attach+0x88/0xa0
Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0
Caller[0079f29c]: driver_attach+0x1c/0x40
Caller[0079e35c]: bus_add_driver+0x17c/0x220
Caller[007a02d4]: driver_register+0x74/0x120
Caller[007263fc]: __pci_register_driver+0x3c/0x60
Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core]
Kernel panic - not syncing: Fatal exception
Press Stop-A (L1-A) to return to the boot prom
---[ end Kernel panic - not syncing: Fatal exception
===

Details:
Here is the call sequence
virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported

The panic happened at line 760(file arch/sparc/kernel/iommu.c)

758 int dma_supported(struct device *dev, u64 device_mask)
759 {
760 struct iommu *iommu = dev->archdata.iommu;
761 u64 dma_addr_mask = iommu->dma_addr_mask;
762
763 if (device_mask >= (1UL << 32UL))
764 return 0;
765
766 if ((device_mask & dma_addr_mask) == dma_addr_mask)
767 return 1;
768
769 #ifdef CONFIG_PCI
770 if (dev_is_pci(dev))
771 return pci64_dma_supported(to_pci_dev(dev), device_mask);
772 #endif
773
774 return 0;
775 }
776 EXPORT_SYMBOL(dma_supported);

Same panic happened with Intel ixgbe driver also.

When VF device is added, driver probe function makes set of calls to
initialize the pci device. Because the VF device is added different
way than the normal PF device(which happens via of_create_pci_dev),
some of the arch specific initialization does not happen for VF device.
That causes panic when archdata is accessed.

To fix this I have introduced weak function pci_init_archdata in SR-IOV
code. Sparc will copy the archdata from PF to VF. Also verified the fix.

Please review. Let me know if there is a better way to fix this.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
---
 arch/sparc/kernel/pci.c |7 +++
 drivers/pci/iov.c   |5 +
 include/linux/pci.h |1 +
 3 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index badf095..0fc774a 100644
--- a/a

Re: [PATCH v2] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers

2016-03-18 Thread Babu Moger

Its  been a while since I submit this patch. Ping 1. 

On 1/8/2016 9:39 AM, Babu Moger wrote:
> I have never seen auto handoff working on TI and RENESAS cards.
> Eventually, we force handoff. This code forces the handoff
> unconditionally. It saves 5 seconds boot time for each card.
> 
> Signed-off-by: Babu Moger <babu.mo...@oracle.com>
> ---
> v2: 
>  Made changes per comments from Greg KH.
>  Extra space removal in assignment
>  Added both vendor and device id checks.
> 
>  drivers/usb/host/pci-quirks.c |8 
>  1 files changed, 8 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
> index f940056..0915f44 100644
> --- a/drivers/usb/host/pci-quirks.c
> +++ b/drivers/usb/host/pci-quirks.c
> @@ -1003,6 +1003,14 @@ static void quirk_usb_handoff_xhci(struct pci_dev 
> *pdev)
>   ext_cap_offset = xhci_find_next_cap_offset(base, 
> ext_cap_offset);
>   } while (1);
>  
> + /* Auto handoff never worked for these devices. Force it and continue */
> + if ((pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) ||
> + (pdev->vendor == PCI_VENDOR_ID_RENESAS
> +  && pdev->device == 0x0014)) {
> + val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED;
> + writel(val, base + ext_cap_offset);
> + }
> +
>   /* If the BIOS owns the HC, signal that the OS wants it, and wait */
>   if (val & XHCI_HC_BIOS_OWNED) {
>   writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset);
>

[PATCH v2] sparc/PCI: Fix for panic while enabling SR-IOV

2016-03-23 Thread Babu Moger

We noticed this panic while enabling SR-IOV in sparc.

mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan  1 2015)
mlx4_core: Initializing 0007:01:00.0
mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs
mlx4_core: Initializing 0007:01:00.1
Unable to handle kernel NULL pointer dereference
insmod(10010): Oops [#1]
CPU: 391 PID: 10010 Comm: insmod Not tainted
4.1.12-32.el6uek.kdump2.sparc64 #1
TPC: <dma_supported+0x20/0x80>
I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]>
Call Trace:
 [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core]
 [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core]
 [00725f14] local_pci_probe+0x34/0xa0
 [00726028] pci_call_probe+0xa8/0xe0
 [00726310] pci_device_probe+0x50/0x80
 [0079f700] really_probe+0x140/0x420
 [0079fa24] driver_probe_device+0x44/0xa0
 [0079fb5c] __device_attach+0x3c/0x60
 [0079d85c] bus_for_each_drv+0x5c/0xa0
 [0079f588] device_attach+0x88/0xc0
 [0071acd0] pci_bus_add_device+0x30/0x80
 [00736090] virtfn_add.clone.1+0x210/0x360
 [007364a4] sriov_enable+0x2c4/0x520
 [0073672c] pci_enable_sriov+0x2c/0x40
 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
 [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core]
Disabling lock debugging due to kernel taint
Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb5c]: __device_attach+0x3c/0x60
Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0
Caller[0079f588]: device_attach+0x88/0xc0
Caller[0071acd0]: pci_bus_add_device+0x30/0x80
Caller[00736090]: virtfn_add.clone.1+0x210/0x360
Caller[007364a4]: sriov_enable+0x2c4/0x520
Caller[0073672c]: pci_enable_sriov+0x2c/0x40
Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core]
Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb08]: __driver_attach+0x88/0xa0
Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0
Caller[0079f29c]: driver_attach+0x1c/0x40
Caller[0079e35c]: bus_add_driver+0x17c/0x220
Caller[007a02d4]: driver_register+0x74/0x120
Caller[007263fc]: __pci_register_driver+0x3c/0x60
Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core]
Kernel panic - not syncing: Fatal exception
Press Stop-A (L1-A) to return to the boot prom
---[ end Kernel panic - not syncing: Fatal exception

Details:
Here is the call sequence
virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported

The panic happened at line 760(file arch/sparc/kernel/iommu.c)

758 int dma_supported(struct device *dev, u64 device_mask)
759 {
760 struct iommu *iommu = dev->archdata.iommu;
761 u64 dma_addr_mask = iommu->dma_addr_mask;
762
763 if (device_mask >= (1UL << 32UL))
764 return 0;
765
766 if ((device_mask & dma_addr_mask) == dma_addr_mask)
767 return 1;
768
769 #ifdef CONFIG_PCI
770 if (dev_is_pci(dev))
771 return pci64_dma_supported(to_pci_dev(dev), device_mask);
772 #endif
773
774 return 0;
775 }
776 EXPORT_SYMBOL(dma_supported);

Same panic happened with Intel ixgbe driver also.

SR-IOV code looks for arch specific data while enabling
VFs. When VF device is added, driver probe function makes set
of calls to initialize the pci device. Because the VF device is
added different way than the normal PF device(which happens via
of_create_pci_dev for sparc), some of the arch specific initialization
does not happen for VF device.  That causes panic when archdata is
accessed.

To fix this, I have used already defined weak function
pcibios_setup_device to copy archdata from PF to VF.
Also verified the fix.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
Reviewed-by: Ethan Zhao <ethan.z...@oracle.com>
---
v2:
 Removed RFC.
 Made changes per comments from Ethan Zhao.
 Now the changes are only in Sparc specific code.
 Removed the changes from driver/pci.
 Implemented already defined weak function pcibios_add_device
 in arch/sparc/kernel/pci.c to initialize sriov archdata. 
 
 arch/sparc/kernel/pci.c |   15 +++
 1 f

[PATCH v3] sparc/PCI: Fix for panic while enabling SR-IOV

2016-03-23 Thread Babu Moger

We noticed this panic while enabling SR-IOV in sparc.

mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan  1 2015)
mlx4_core: Initializing 0007:01:00.0
mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs
mlx4_core: Initializing 0007:01:00.1
Unable to handle kernel NULL pointer dereference
insmod(10010): Oops [#1]
CPU: 391 PID: 10010 Comm: insmod Not tainted
4.1.12-32.el6uek.kdump2.sparc64 #1
TPC: <dma_supported+0x20/0x80>
I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]>
Call Trace:
 [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core]
 [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core]
 [00725f14] local_pci_probe+0x34/0xa0
 [00726028] pci_call_probe+0xa8/0xe0
 [00726310] pci_device_probe+0x50/0x80
 [0079f700] really_probe+0x140/0x420
 [0079fa24] driver_probe_device+0x44/0xa0
 [0079fb5c] __device_attach+0x3c/0x60
 [0079d85c] bus_for_each_drv+0x5c/0xa0
 [0079f588] device_attach+0x88/0xc0
 [0071acd0] pci_bus_add_device+0x30/0x80
 [00736090] virtfn_add.clone.1+0x210/0x360
 [007364a4] sriov_enable+0x2c4/0x520
 [0073672c] pci_enable_sriov+0x2c/0x40
 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
 [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core]
Disabling lock debugging due to kernel taint
Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb5c]: __device_attach+0x3c/0x60
Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0
Caller[0079f588]: device_attach+0x88/0xc0
Caller[0071acd0]: pci_bus_add_device+0x30/0x80
Caller[00736090]: virtfn_add.clone.1+0x210/0x360
Caller[007364a4]: sriov_enable+0x2c4/0x520
Caller[0073672c]: pci_enable_sriov+0x2c/0x40
Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core]
Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb08]: __driver_attach+0x88/0xa0
Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0
Caller[0079f29c]: driver_attach+0x1c/0x40
Caller[0079e35c]: bus_add_driver+0x17c/0x220
Caller[007a02d4]: driver_register+0x74/0x120
Caller[007263fc]: __pci_register_driver+0x3c/0x60
Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core]
Kernel panic - not syncing: Fatal exception
Press Stop-A (L1-A) to return to the boot prom
---[ end Kernel panic - not syncing: Fatal exception

Details:
Here is the call sequence
virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported

The panic happened at line 760(file arch/sparc/kernel/iommu.c)

758 int dma_supported(struct device *dev, u64 device_mask)
759 {
760 struct iommu *iommu = dev->archdata.iommu;
761 u64 dma_addr_mask = iommu->dma_addr_mask;
762
763 if (device_mask >= (1UL << 32UL))
764 return 0;
765
766 if ((device_mask & dma_addr_mask) == dma_addr_mask)
767 return 1;
768
769 #ifdef CONFIG_PCI
770 if (dev_is_pci(dev))
771 return pci64_dma_supported(to_pci_dev(dev), device_mask);
772 #endif
773
774 return 0;
775 }
776 EXPORT_SYMBOL(dma_supported);

Same panic happened with Intel ixgbe driver also.

SR-IOV code looks for arch specific data while enabling
VFs. When VF device is added, driver probe function makes set
of calls to initialize the pci device. Because the VF device is
added different way than the normal PF device(which happens via
of_create_pci_dev for sparc), some of the arch specific initialization
does not happen for VF device.  That causes panic when archdata is
accessed.

To fix this, I have used already defined weak function
pcibios_setup_device to copy archdata from PF to VF.
Also verified the fix.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
Reviewed-by: Ethan Zhao <ethan.z...@oracle.com>
---
v2:
 Removed RFC.
 Made changes per comments from Ethan Zhao.
 Now the changes are only in Sparc specific code.
 Removed the changes from driver/pci.
 Implemented already defined weak function pcibios_add_device
 in arch/sparc/kernel/pci.c to initialize sriov archdata. 

v3:
 Fixed the compile error repor

[PATCH v4] sparc/PCI: Fix for panic while enabling SR-IOV

2016-03-24 Thread Babu Moger

We noticed this panic while enabling SR-IOV in sparc.

mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan  1 2015)
mlx4_core: Initializing 0007:01:00.0
mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs
mlx4_core: Initializing 0007:01:00.1
Unable to handle kernel NULL pointer dereference
insmod(10010): Oops [#1]
CPU: 391 PID: 10010 Comm: insmod Not tainted
4.1.12-32.el6uek.kdump2.sparc64 #1
TPC: <dma_supported+0x20/0x80>
I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]>
Call Trace:
 [104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core]
 [104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core]
 [00725f14] local_pci_probe+0x34/0xa0
 [00726028] pci_call_probe+0xa8/0xe0
 [00726310] pci_device_probe+0x50/0x80
 [0079f700] really_probe+0x140/0x420
 [0079fa24] driver_probe_device+0x44/0xa0
 [0079fb5c] __device_attach+0x3c/0x60
 [0079d85c] bus_for_each_drv+0x5c/0xa0
 [0079f588] device_attach+0x88/0xc0
 [0071acd0] pci_bus_add_device+0x30/0x80
 [00736090] virtfn_add.clone.1+0x210/0x360
 [007364a4] sriov_enable+0x2c4/0x520
 [0073672c] pci_enable_sriov+0x2c/0x40
 [104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
 [104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core]
Disabling lock debugging due to kernel taint
Caller[104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb5c]: __device_attach+0x3c/0x60
Caller[0079d85c]: bus_for_each_drv+0x5c/0xa0
Caller[0079f588]: device_attach+0x88/0xc0
Caller[0071acd0]: pci_bus_add_device+0x30/0x80
Caller[00736090]: virtfn_add.clone.1+0x210/0x360
Caller[007364a4]: sriov_enable+0x2c4/0x520
Caller[0073672c]: pci_enable_sriov+0x2c/0x40
Caller[104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core]
Caller[104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core]
Caller[104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core]
Caller[104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core]
Caller[00725f14]: local_pci_probe+0x34/0xa0
Caller[00726028]: pci_call_probe+0xa8/0xe0
Caller[00726310]: pci_device_probe+0x50/0x80
Caller[0079f700]: really_probe+0x140/0x420
Caller[0079fa24]: driver_probe_device+0x44/0xa0
Caller[0079fb08]: __driver_attach+0x88/0xa0
Caller[0079d90c]: bus_for_each_dev+0x6c/0xa0
Caller[0079f29c]: driver_attach+0x1c/0x40
Caller[0079e35c]: bus_add_driver+0x17c/0x220
Caller[007a02d4]: driver_register+0x74/0x120
Caller[007263fc]: __pci_register_driver+0x3c/0x60
Caller[104f62bc]: mlx4_init+0x60/0xcc [mlx4_core]
Kernel panic - not syncing: Fatal exception
Press Stop-A (L1-A) to return to the boot prom
---[ end Kernel panic - not syncing: Fatal exception

Details:
Here is the call sequence
virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported

The panic happened at line 760(file arch/sparc/kernel/iommu.c)

758 int dma_supported(struct device *dev, u64 device_mask)
759 {
760 struct iommu *iommu = dev->archdata.iommu;
761 u64 dma_addr_mask = iommu->dma_addr_mask;
762
763 if (device_mask >= (1UL << 32UL))
764 return 0;
765
766 if ((device_mask & dma_addr_mask) == dma_addr_mask)
767 return 1;
768
769 #ifdef CONFIG_PCI
770 if (dev_is_pci(dev))
771 return pci64_dma_supported(to_pci_dev(dev), device_mask);
772 #endif
773
774 return 0;
775 }
776 EXPORT_SYMBOL(dma_supported);

Same panic happened with Intel ixgbe driver also.

SR-IOV code looks for arch specific data while enabling
VFs. When VF device is added, driver probe function makes set
of calls to initialize the pci device. Because the VF device is
added different way than the normal PF device(which happens via
of_create_pci_dev for sparc), some of the arch specific initialization
does not happen for VF device.  That causes panic when archdata is
accessed.

To fix this, I have used already defined weak function
pcibios_setup_device to copy archdata from PF to VF.
Also verified the fix.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
Reviewed-by: Ethan Zhao <ethan.z...@oracle.com>
---
v2:
 Removed RFC.
 Made changes per comments from Ethan Zhao.
 Now the changes are only in Sparc specific code.
 Removed the changes from driver/pci.
 Implemented already defined weak function pcibios_add_device
 in arch/sparc/kernel/pci.c to initialize sriov archdata. 

v3:
 Fixed the compile error reported in kbuild t

Re: [PATCH v4] sparc/PCI: Fix for panic while enabling SR-IOV

2016-03-30 Thread Babu Moger

Hi David,

On 3/29/2016 7:57 PM, David Miller wrote:
> From: Babu Moger <babu.mo...@oracle.com>
> Date: Thu, 24 Mar 2016 13:02:22 -0700
> 
>> We noticed this panic while enabling SR-IOV in sparc.
>  ...
>> SR-IOV code looks for arch specific data while enabling
>> VFs. When VF device is added, driver probe function makes set
>> of calls to initialize the pci device. Because the VF device is
>> added different way than the normal PF device(which happens via
>> of_create_pci_dev for sparc), some of the arch specific initialization
>> does not happen for VF device.  That causes panic when archdata is
>> accessed.
>>
>> To fix this, I have used already defined weak function
>> pcibios_setup_device to copy archdata from PF to VF.
>> Also verified the fix.
>>
>> Signed-off-by: Babu Moger <babu.mo...@oracle.com>
>> Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
>> Reviewed-by: Ethan Zhao <ethan.z...@oracle.com>
> 
> Looks good, applied and queued up for -stable, thanks.

Thanks.

> 
> Just a note, I am assuming that the VFs are not instantiated in the
> device tree.  Because when you just memcpy the arch data over from the
> PF, one thing we end up doing is using the device node of the PF.

No. VFs are not instantiated in device tree(/proc/device-tree)

> 
> I slightly cringed at the memcpy, because at least one of these
> pointers are to objects which are reference counted, the OF device.
> 
> Generally speaking we don't really support hot-plug for OF probed
> devices, but if we did all of the device tree pointers have to be
> refcounted properly.
> 
> So in the long term that whole sequence where we go:
> 
>   struct dev_archdata *sd;
>  ...
>   sd = >dev.archdata;
>   sd->iommu = pbm->iommu;
>   sd->stc = >stc;
>   sd->host_controller = pbm;
>   sd->op = op = of_find_device_by_node(node);
>   sd->numa_node = pbm->numa_node;
> 
> should be encapsulated into a helper function, and both
> of_create_pci_dev() and this new pcibios_setup_device() can
> invoke it.
> 

Yes. Agree. We need to refactor the whole of_create_pci_dev path to support
hot-plug for the long term. I will start looking at it. For now we should be
fine with the current patch. thanks

[PATCH] ixgbevf: Fix relaxed order settings in VF driver

2016-04-21 Thread Babu Moger

Current code writes the tx/rx relaxed order without reading it first.
This can lead to unintended consequences as we are forcibly writing
other bits.

We noticed this problem while testing VF driver on sparc. Relaxed
order settings for rx queue were all messed up which was causing
performance drop with VF interface.

Fixed it by reading the registers first and setting the specific
bit of interest. With this change we are able to match the bandwidth
equivalent to PF interface.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |9 +++--
 1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c 
b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 0ea14c0..51abff1 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1545,6 +1545,7 @@ static inline void ixgbevf_irq_enable(struct 
ixgbevf_adapter *adapter)
 static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter,
  struct ixgbevf_ring *ring)
 {
+   u32 regval;
struct ixgbe_hw *hw = >hw;
u64 tdba = ring->dma;
int wait_loop = 10;
@@ -1565,8 +1566,10 @@ static void ixgbevf_configure_tx_ring(struct 
ixgbevf_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_VFTDWBAL(reg_idx), 0);
 
/* enable relaxed ordering */
+   regval = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(reg_idx));
+
IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(reg_idx),
-   (IXGBE_DCA_TXCTRL_DESC_RRO_EN |
+   (regval | IXGBE_DCA_TXCTRL_DESC_RRO_EN |
 IXGBE_DCA_TXCTRL_DATA_RRO_EN));
 
/* reset head and tail pointers */
@@ -1734,6 +1737,7 @@ static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter 
*adapter)
 static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
  struct ixgbevf_ring *ring)
 {
+   u32 regval;
struct ixgbe_hw *hw = >hw;
u64 rdba = ring->dma;
u32 rxdctl;
@@ -1749,8 +1753,9 @@ static void ixgbevf_configure_rx_ring(struct 
ixgbevf_adapter *adapter,
ring->count * sizeof(union ixgbe_adv_rx_desc));
 
/* enable relaxed ordering */
+   regval = IXGBE_READ_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx));
IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx),
-   IXGBE_DCA_RXCTRL_DESC_RRO_EN);
+   regval | IXGBE_DCA_RXCTRL_DESC_RRO_EN);
 
/* reset head and tail pointers */
IXGBE_WRITE_REG(hw, IXGBE_VFRDH(reg_idx), 0);
-- 
1.7.1

Re: [PATCH] ixgbevf: Fix relaxed order settings in VF driver

2016-04-21 Thread Babu Moger

Hi Alex,

On 4/21/2016 2:22 PM, Alexander Duyck wrote:
> On Thu, Apr 21, 2016 at 11:13 AM, Alexander Duyck
> <alexander.du...@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 10:21 AM, Babu Moger <babu.mo...@oracle.com> wrote:
>>> Current code writes the tx/rx relaxed order without reading it first.
>>> This can lead to unintended consequences as we are forcibly writing
>>> other bits.
>>
>> The consequences were very much intended as there are situations where
>> enabling relaxed ordering can lead to data corruption.
>>
>>> We noticed this problem while testing VF driver on sparc. Relaxed
>>> order settings for rx queue were all messed up which was causing
>>> performance drop with VF interface.
>>
>> What additional relaxed ordering bits are you enabling on Sparc?  I'm
>> assuming it is just the Rx data write back but I want to verify.
>>
>>> Fixed it by reading the registers first and setting the specific
>>> bit of interest. With this change we are able to match the bandwidth
>>> equivalent to PF interface.
>>>
>>> Signed-off-by: Babu Moger <babu.mo...@oracle.com>
>>
>> Fixed is a relative term here since you are only chasing performance
>> from what I can tell.  We need to make certain that this doesn't break
>> the driver on any other architectures by leading to things like data
>> corruption.
>>
>> - Alex
> 
> It occurs to me that what might be easier is instead of altering the
> configuration on all architectures you could instead wrap the write so
> that on SPARC you include the extra bits you need and on all other
> architectures you leave the write as-is similar to how the code in the
> ixgbe_start_hw_gen2 only clears the bits if CONFIG_SPARC is not
> defined.


Here are the default values that I see when testing on Sparc.

Default tx value 0x2a00

All below 3 set
#define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */
#define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */
#define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */

I am not too worried about tx values. I can keep it as it is. It did not
seem to cause any problems right now.


Default rx value 0xb200

All below 3 set plus one more

#define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx rd Desc Relax Order */
#define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */
#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */

Is there a reason to disable IXGBE_DCA_RXCTRL_DATA_WRO_EN and
IXGBE_DCA_RXCTRL_HEAD_WRO_EN for RX? 

I would think CONFIG_SPARC should be our last option. What do you think?

> 
> - Alex
>

[PATCH v2] ixgbevf: Change the relaxed order settings in VF driver for sparc

2016-04-21 Thread Babu Moger

We noticed performance issues with VF interface on sparc compared
to PF. Setting the RX to IXGBE_DCA_RXCTRL_DATA_WRO_EN brings it
on far with PF. Also this matches to the default sparc setting in
PF driver.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
---
v2:
  Alexander had concerns about this negativily affecting other architectures.
  Added CONFIG_SPARC check so this should not affect other architectures.

 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c 
b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 0ea14c0..3596e0b 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1748,9 +1748,15 @@ static void ixgbevf_configure_rx_ring(struct 
ixgbevf_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(reg_idx),
ring->count * sizeof(union ixgbe_adv_rx_desc));
 
+#ifndef CONFIG_SPARC
/* enable relaxed ordering */
IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx),
IXGBE_DCA_RXCTRL_DESC_RRO_EN);
+#else
+   IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx),
+   IXGBE_DCA_RXCTRL_DESC_RRO_EN |
+   IXGBE_DCA_RXCTRL_DATA_WRO_EN);
+#endif
 
/* reset head and tail pointers */
IXGBE_WRITE_REG(hw, IXGBE_VFRDH(reg_idx), 0);
-- 
1.7.1

Filesystem slow write performance

2016-08-06 Thread Babu Moger

xa0
Aug  3 12:18:55 build-t7 kernel: [005586a8] 
filemap_fdatawait_range+0xc8/0x140
Aug  3 12:18:55 build-t7 kernel: [005587fc] 
filemap_write_and_wait_range+0x3c/0x80
Aug  3 12:18:55 build-t7 kernel: [100e4258] 
ext3_sync_file+0x58/0x2c0 [ext3]
Aug  3 12:18:55 build-t7 kernel: [005f54d8] 
vfs_fsync_range+0x38/0xa0

Aug  3 12:18:55 build-t7 kernel: [005f555c] vfs_fsync+0x1c/0x40
Aug  3 12:18:55 build-t7 kernel: [005f55a8] do_fsync+0x28/0x60
Aug  3 12:18:55 build-t7 kernel: [005f55f0] SyS_fdatasync+0x10/0x40

I am not an expert on this area. Note that I am bit behind on the kernel 
version(but not a whole lot). Working on to recreate this
with latest upstream kernel. Looked at the upstream patches and tried 
most of the upstream patches which appear to be related
and nothing helped. Problem is fairly easy to reproduce. Let me know if 
you want me to try something.

Thanks
Babu Moger

Re: Filesystem slow write performance

2016-08-08 Thread Babu Moger


I wasn't able to repro this with mainline. Sorry for the noise.

On 8/6/2016 1:49 PM, Babu Moger wrote:

Hi,

Seeing some terrible write performance with ext3/4 writes.  Reads are 
fine.


I have a created loop device and mounted as ext3(tried ext4 also).

Here is iostat output. await time is pretty high most of the time.

Device: rrqm/s   wrqm/s r/s w/s   rsec/s   wsec/s 
avgrq-sz avgqu-sz   await  svctm  %util
loop0 0.00 0.000.00  133.00 0.00 1064.00 
8.00   124.14  835.61   7.52 100.00
dm-0  0.00 0.000.00  132.00 0.00 1056.00 
8.00 1.007.52   7.52  99.20


Device: rrqm/s   wrqm/s r/s w/s   rsec/s   wsec/s 
avgrq-sz avgqu-sz   await  svctm  %util
loop0 0.00 0.000.00   94.00 0.00 752.00 
8.00   124.18  901.02  10.64 100.00
dm-0  0.00 0.000.00   92.00 0.00 736.00 
8.00 1.02   11.09  10.87 100.00


Device: rrqm/s   wrqm/s r/s w/s   rsec/s   wsec/s 
avgrq-sz avgqu-sz   await  svctm  %util
loop0 0.00 0.000.00  132.00 0.00 1056.00 
8.00   124.56 1329.30   7.58 100.00
dm-0  0.00 0.000.00  141.00 0.00 1128.00 
8.00 1.087.72   7.06  99.60


Tags output
[root@build-t7 0]# cat tags
nr_tags=128, reserved_tags=0, bits_per_word=5
nr_free=128, nr_reserved=0
active_queues=0

Here is the output of  "echo w > /proc/sysrq-trigger" when the problem 
happens.


Aug  3 12:18:55 build-t7 kernel: kworker/u512:0  D 
009defd4 0 6  2 0x0600
Aug  3 12:18:55 build-t7 kernel: Workqueue: writeback 
bdi_writeback_workfn (flush-7:0)

Aug  3 12:18:55 build-t7 kernel: Call Trace:
Aug  3 12:18:55 build-t7 kernel: [009dc9e4] schedule+0x24/0xa0
Aug  3 12:18:55 build-t7 kernel: [009defd4] 
schedule_timeout+0x134/0x220
Aug  3 12:18:55 build-t7 kernel: [009dc044] 
io_schedule_timeout+0x84/0x100

Aug  3 12:18:55 build-t7 kernel: [006be64c] bt_get+0x10c/0x1e0
Aug  3 12:18:55 build-t7 kernel: [006be7f4] 
blk_mq_get_tag+0x74/0xe0
Aug  3 12:18:55 build-t7 kernel: [006ba570] 
__blk_mq_alloc_request+0x10/0x180
Aug  3 12:18:55 build-t7 kernel: [006bb9f4] 
blk_mq_map_request+0x1d4/0x260
Aug  3 12:18:55 build-t7 kernel: [006bbd40] 
blk_sq_make_request+0x60/0x300
Aug  3 12:18:55 build-t7 kernel: [006afa58] 
generic_make_request+0x78/0xe0

Aug  3 12:18:55 build-t7 kernel: [006afb44] submit_bio+0x84/0x160
Aug  3 12:18:55 build-t7 kernel: [005f7cb4] 
_submit_bh+0x174/0x200

Aug  3 12:18:55 build-t7 kernel: [005f7d54] submit_bh+0x14/0x40
Aug  3 12:18:55 build-t7 kernel: [005fc248] 
__block_write_full_page.clone.0+0x2c8/0x500
Aug  3 12:18:55 build-t7 kernel: [005fc620] 
block_write_full_page+0xa0/0xe0
Aug  3 12:18:55 build-t7 kernel: [100e7d94] 
ext3_writeback_writepage+0x134/0x200 [ext3]

Aug  3 12:18:55 build-t7 kernel: [00562798] __writepage+0x18/0x60

Aug  3 12:18:55 build-t7 kernel: loop0   D 
009deff4 0 15632  2 0x01000400

Aug  3 12:18:55 build-t7 kernel: Call Trace:
Aug  3 12:18:55 build-t7 kernel: [009dc9e4] schedule+0x24/0xa0
Aug  3 12:18:55 build-t7 kernel: [009deff4] 
schedule_timeout+0x154/0x220
Aug  3 12:18:55 build-t7 kernel: [009dc044] 
io_schedule_timeout+0x84/0x100

Aug  3 12:18:55 build-t7 kernel: [009dcdbc] bit_wait_io+0x3c/0x80
Aug  3 12:18:55 build-t7 kernel: [009dd1c4] 
__wait_on_bit+0x84/0x100
Aug  3 12:18:55 build-t7 kernel: [0055719c] 
wait_on_page_bit+0x7c/0xa0
Aug  3 12:18:55 build-t7 kernel: [005586a8] 
filemap_fdatawait_range+0xc8/0x140
Aug  3 12:18:55 build-t7 kernel: [005587fc] 
filemap_write_and_wait_range+0x3c/0x80
Aug  3 12:18:55 build-t7 kernel: [00558a58] 
__generic_file_write_iter+0xb8/0x140
Aug  3 12:18:55 build-t7 kernel: [00558bac] 
generic_file_write_iter+0xcc/0x1e0

Aug  3 12:18:55 build-t7 kernel: [007ca000] lo_rw_aio+0x180/0x240
Aug  3 12:18:55 build-t7 kernel: [007ca260] 
do_req_filebacked+0x1a0/0x1c0
Aug  3 12:18:55 build-t7 kernel: [007ca2b4] 
loop_queue_work+0x34/0x80
Aug  3 12:18:55 build-t7 kernel: [00491944] 
kthread_worker_fn+0x44/0x180

Aug  3 12:18:55 build-t7 kernel: [00491c4c] kthread+0xac/0xe0
Aug  3 12:18:55 build-t7 kernel: [00406184] 
ret_from_fork+0x1c/0x2c


Aug  3 12:18:55 build-t7 kernel: livecd-creator  D 
009deff4 0 15627   2676 0x308000103000400

Aug  3 12:18:55 build-t7 kernel: Call Trace:
Aug  3 12:18:55 build-t7 kernel: [009dc9e4] schedule+0x24/0xa0
Aug  3 12:18:55 build-t7 kernel: [009deff4] 
schedule_timeout+0x154/0x220
Aug  3 12:18:55 build-t7 kernel: [009dc044] 
io_schedule_timeout+0x84/0x100

Aug  3 12:18:55 build-t7 kernel: [009dcdbc] bit_wait_io+0x3c/0x80
Aug  3 12:18:55 build-t7 kernel: [000

Re: [PATCH v3 2/2] lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined

2016-09-29 Thread Babu Moger


CCing Dave.

Dave, Please ack it if it looks good. Thanks.


On 9/27/2016 2:33 PM, Babu Moger wrote:

Reduce the size of data structure for lockdep entries by half if
PROVE_LOCKING_SMALL if defined. This is used only for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
  kernel/locking/lockdep_internals.h |   20 +---
  1 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/lockdep_internals.h 
b/kernel/locking/lockdep_internals.h
index 51c4b24..c2b8849 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,6 +46,14 @@ enum {
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
  
  /*

+ * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * .data and .bss to fit in required 32MB limit for the kernel. With
+ * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * So, reduce the static allocations for lockdeps related structures so that
+ * everything fits in current required size limit.
+ */
+#ifdef CONFIG_PROVE_LOCKING_SMALL
+/*
   * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
   * we track.
   *
@@ -54,18 +62,24 @@ enum {
   * table (if it's not there yet), and we check it for lock order
   * conflicts and deadlocks.
   */
+#define MAX_LOCKDEP_ENTRIES16384UL
+#define MAX_LOCKDEP_CHAINS_BITS15
+#define MAX_STACK_TRACE_ENTRIES262144UL
+#else
  #define MAX_LOCKDEP_ENTRIES   32768UL
  
  #define MAX_LOCKDEP_CHAINS_BITS	16

-#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
-
-#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
  
  /*

   * Stack-trace: tightly packed array of stack backtrace
   * addresses. Protected by the hash_lock.
   */
  #define MAX_STACK_TRACE_ENTRIES   524288UL
+#endif
+
+#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
  
  extern struct list_head all_lock_classes;

  extern struct lock_chain lock_chains[];

Re: [PATCH v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc

2016-09-29 Thread Babu Moger



On 9/28/2016 3:39 AM, Peter Zijlstra wrote:

On Tue, Sep 27, 2016 at 12:33:27PM -0700, Babu Moger wrote:

This new config parameter limits the space used for "Lock debugging:
prove locking correctness" by about 4MB. The current sparc systems have
the limitation of 32MB size for kernel size including .text, .data and
.bss sections. With PROVE_LOCKING feature, the kernel size could grow
beyond this limit and causing system boot-up issues. With this option,
kernel limits the size of the entries of lock_chains, stack_trace etc.,
so that kernel fits in required size limit. This is not visible to user
and only used for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>

You forgot to Cc Dave, and since you're touching sparc I need an Ack
from him before I can queue this.
Dave, Can you please take a look at the patch. Please ack it if it looks 
good.

Thanks


Dave?


---
  arch/sparc/Kconfig |1 +
  lib/Kconfig.debug  |3 +++
  2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 59b0960..8da321c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -44,6 +44,7 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select HAVE_ARCH_HARDENED_USERCOPY
+   select PROVE_LOCKING_SMALL if PROVE_LOCKING
  
  config SPARC32

def_bool !64BIT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cab7405..597e589 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1084,6 +1084,9 @@ config PROVE_LOCKING
  
  	 For more details, see Documentation/locking/lockdep-design.txt.
  
+config PROVE_LOCKING_SMALL

+   bool
+
  config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT 
&& LOCKDEP_SUPPORT
--
1.7.1

Re: [PATCH v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc

2016-10-05 Thread Babu Moger

Dave, Gentle reminder to review this patch. Thanks

On 9/30/2016 12:19 AM, David Miller wrote:

From: Babu Moger <babu.mo...@oracle.com>
Date: Thu, 29 Sep 2016 08:53:24 -0500

On 9/28/2016 3:39 AM, Peter Zijlstra wrote:

On Tue, Sep 27, 2016 at 12:33:27PM -0700, Babu Moger wrote:

This new config parameter limits the space used for "Lock debugging:
prove locking correctness" by about 4MB. The current sparc systems
have
the limitation of 32MB size for kernel size including .text, .data and
.bss sections. With PROVE_LOCKING feature, the kernel size could grow
beyond this limit and causing system boot-up issues. With this option,
kernel limits the size of the entries of lock_chains, stack_trace
etc.,
so that kernel fits in required size limit. This is not visible to
user
and only used for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>

You forgot to Cc Dave, and since you're touching sparc I need an Ack
from him before I can queue this.

Dave, Can you please take a look at the patch. Please ack it if it
looks good.

I am travelling and will look at it when I get a chance.

Re: [PATCH 1/2] watchdog: Introduce update_arch_nmi_watchdog

2016-10-07 Thread Babu Moger



On 10/6/2016 11:34 PM, Sam Ravnborg wrote:

On Thu, Oct 06, 2016 at 03:16:42PM -0700, Babu Moger wrote:

Currently we do not have a way to enable/disable arch specific
watchdog handlers if it was implemented by any of the architectures.

This patch introduces new function update_arch_nmi_watchdog
which can be used to enable/disable architecture specific NMI
watchdog handlers. Also exposes watchdog_enabled variable outside
so that arch specific nmi watchdogs can use it to implement
enalbe/disable behavour.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
  include/linux/nmi.h |1 +
  kernel/watchdog.c   |   16 +---
  2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 4630eea..01b4830 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -66,6 +66,7 @@ static inline bool trigger_allbutself_cpu_backtrace(void)
  
  #ifdef CONFIG_LOCKUP_DETECTOR

  u64 hw_nmi_get_sample_period(int watchdog_thresh);
+extern unsigned long watchdog_enabled;

The extern is within an #ifdef, but the definition later is
valid alway.
So extern definition should be outside the #ifdef to match the
actual implementation.


 Ok. Sure.



To manipulate / read watchdog_enabled two constants are used:
NMI_WATCHDOG_ENABLED, SOFT_WATCHDOG_ENABLED


Sure. I will bring these definitions to nmi.h from watchdog.c


They should be visible too, so uses do not fall into the trap
and uses constants (like in patch 2).


Will re-post v2 version with these changes. Thanks for the comments.



Sam

[PATCH 0/2] Introduce update_arch_nmi_watchdog for arch specific handlers

2016-10-06 Thread Babu Moger

During our testing we noticed that nmi watchdogs in sparc could not be disabled 
or
enabled dynamically using sysctl/proc interface. Sparc uses its own arch 
specific
nmi watchdogs. There is a sysctl and proc 
interface(proc/sys/kernel/nmi_watchdog)
to enable/disable nmi watchdogs. However, that is not working for sparc. There
is no interface to feed this parameter to arch specific nmi watchdogs.

These patches extend the same sysctl/proc interface to enable or disable
these arch specific nmi watchdogs dynamically. Introduced new function
update_arch_nmi_watchdog which can be implemented in arch specific handlers.
If you think there is a better way to do this. Please advice.

Tested on sparc. Compile tested on x86.

Babu Moger (2):
  watchdog: Introduce update_arch_nmi_watchdog
  sparc: Implement update_arch_nmi_watchdog

 arch/sparc/kernel/nmi.c |   26 ++
 include/linux/nmi.h |1 +
 kernel/watchdog.c   |   16 +---
 3 files changed, 40 insertions(+), 3 deletions(-)

[PATCH 1/2] watchdog: Introduce update_arch_nmi_watchdog

2016-10-06 Thread Babu Moger

Currently we do not have a way to enable/disable arch specific
watchdog handlers if it was implemented by any of the architectures.

This patch introduces new function update_arch_nmi_watchdog
which can be used to enable/disable architecture specific NMI
watchdog handlers. Also exposes watchdog_enabled variable outside
so that arch specific nmi watchdogs can use it to implement
enalbe/disable behavour.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 include/linux/nmi.h |1 +
 kernel/watchdog.c   |   16 +---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 4630eea..01b4830 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -66,6 +66,7 @@ static inline bool trigger_allbutself_cpu_backtrace(void)
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
+extern unsigned long watchdog_enabled;
 extern int nmi_watchdog_enabled;
 extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..1ac2814 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -46,16 +46,21 @@
 
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
 #else
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
 int __read_mostly nmi_watchdog_enabled;
 int __read_mostly soft_watchdog_enabled;
 int __read_mostly watchdog_user_enabled;
 int __read_mostly watchdog_thresh = 10;
 
+/*
+ * Implemented by arch specific handlers if it defines CONFIG_HAVE_NMI_WATCHDOG
+ */
+void __weak update_arch_nmi_watchdog(void) {}
+
 #ifdef CONFIG_SMP
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
@@ -842,6 +847,11 @@ static int proc_watchdog_update(void)
int err = 0;
 
/*
+* Enable/Disable arch specific nmi watchdogs if there is one
+*/
+   update_arch_nmi_watchdog();
+
+   /*
 * Watchdog threads won't be started if they are already active.
 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
 * care of this. If those threads are already active, the sample
-- 
1.7.1

[PATCH 2/2] sparc: Implement update_arch_nmi_watchdog

2016-10-06 Thread Babu Moger

Implement function update_arch_nmi_watchdog to enable/disable
nmi watchdog. Sparc uses arch specific nmi watchdog handler.
Currently, we do not have a way to enable/disable nmi watchdog
dynamically. With these patches we can enable or disable arch
specinf nmi watchdogs using proc or sysctl interface.

Example commands.
To enable: echo 1 >  /proc/sys/kernel/nmi_watchdog
To disable: echo 0 >  /proc/sys/kernel/nmi_watchdog

It can also achieved using the sysctl parameter kernel.nmi_watchdog

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/kernel/nmi.c |   26 ++
 1 files changed, 26 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a9973bb..27c4e18 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 
 void stop_nmi_watchdog(void *unused)
 {
+   if (!__this_cpu_read(wd_enabled))
+   return;
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__this_cpu_write(wd_enabled, 0);
atomic_dec(_active);
@@ -207,6 +209,8 @@ error:
 
 void start_nmi_watchdog(void *unused)
 {
+   if (__this_cpu_read(wd_enabled))
+   return;
__this_cpu_write(wd_enabled, 1);
atomic_inc(_active);
 
@@ -270,3 +274,25 @@ static int __init setup_nmi_watchdog(char *str)
return 0;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
+
+#ifdef CONFIG_LOCKUP_DETECTOR
+void update_arch_nmi_watchdog(void)
+{
+   if (atomic_read(_active) < 0) {
+   printk(KERN_WARNING
+  "NMI watchdog cannot be enabled or disabled\n");
+   return;
+   }
+
+   /*
+* Check for bit 0. Bit 0 is dedicated for hard lockup detector or
+* arch specific nmi and bit 1 for the soft lockup detector. We
+* are interested only in bit 0 here.
+*/
+   if (watchdog_enabled & 1)
+   on_each_cpu(start_nmi_watchdog, NULL, 1);
+   else
+   on_each_cpu(stop_nmi_watchdog, NULL, 1);
+
+}
+#endif
-- 
1.7.1

Re: [PATCH 0/2] Ajust lockdep static allocations

2016-09-23 Thread Babu Moger


On 9/23/2016 2:12 AM, Peter Zijlstra wrote:

On Thu, Sep 22, 2016 at 11:43:34AM -0700, Babu Moger wrote:

These patches adjust the static allocations for lockdep
data structures used for debugging locking correctness. The current
code reserves about 4MB extra space for these data structures. Most
of the configurations do not need these many data structures. While
testing, I have not seen it go beyond 20% of already reserved entries.

$grep "lock-classes" /proc/lockdep_stats
lock-classes:  1560 [max: 8191]

Reserving even more space seems unreasonable. So, keeping the default
entries small as before the Commit 1413c0389333 ("lockdep: Increase static
allocations"). Added new CONFIG_PROVE_LOCKING_PLUS in case someone
needs more entries to debug their large configuration.

Why make this more complicated? There's absolutely no upside to this
change as far as I can see.

Peter, What do you mean?  Revert the commit 1413c038933?  Right now,
I cannot boot my setup after enabling lockdep. How do you think we can 
handle this?

Re: [PATCH 0/2] Ajust lockdep static allocations

2016-09-23 Thread Babu Moger



On 9/23/2016 9:34 AM, Peter Zijlstra wrote:

On Fri, Sep 23, 2016 at 09:04:42AM -0500, Babu Moger wrote:

On 9/23/2016 2:12 AM, Peter Zijlstra wrote:

On Thu, Sep 22, 2016 at 11:43:34AM -0700, Babu Moger wrote:

These patches adjust the static allocations for lockdep
data structures used for debugging locking correctness. The current
code reserves about 4MB extra space for these data structures. Most
of the configurations do not need these many data structures. While
testing, I have not seen it go beyond 20% of already reserved entries.

$grep "lock-classes" /proc/lockdep_stats
lock-classes:  1560 [max: 8191]

Reserving even more space seems unreasonable. So, keeping the default
entries small as before the Commit 1413c0389333 ("lockdep: Increase static
allocations"). Added new CONFIG_PROVE_LOCKING_PLUS in case someone
needs more entries to debug their large configuration.

Why make this more complicated? There's absolutely no upside to this
change as far as I can see.

Peter, What do you mean?

I mean I see no point to the patches you send.


Revert the commit 1413c038933?

Nah, why would I?


Right now, I cannot boot my setup after enabling lockdep. How do you
think we can handle this?

Why can't you boot? You have that little memories? 4MB doesn't seem like
a worthwhile amount of memory.

Also, you didn't say.  This seems a somewhat crucial point.


Correct, We can't boot with lockdep. Sorry I did not make that 
clear.  We have a limit on static size of the kernel.




In any case, maybe invert this, add make it depend on CONFIG_BASE_SMALL,
since this really only matters for really dinky systems.

 Sure. Will  use  CONFIG_BASE_SMALL and re-post the patches. Thanks

Re: [PATCH 0/2] Ajust lockdep static allocations

2016-09-23 Thread Babu Moger




On 9/23/2016 10:04 AM, Peter Zijlstra wrote:

On Fri, Sep 23, 2016 at 09:50:52AM -0500, Babu Moger wrote:

Why can't you boot? You have that little memories? 4MB doesn't seem like
a worthwhile amount of memory.

Also, you didn't say.  This seems a somewhat crucial point.

 Correct, We can't boot with lockdep. Sorry I did not make that clear.
We have a limit on static size of the kernel.

This stuff should be in .bss not .data. It should not affect the static
size at all. Or am I misunderstanding things?

 Here it is.
$ ./scripts/bloat-o-meter vmlinux.lockdep.small vmlinux.lockdep.big
add/remove: 0/0 grow/shrink: 5/0 up/down: 4653056/0 (4653056)
function old new   delta
stack_trace  2097152 4194304 +2097152
lock_chains  1048576 2097152 +1048576
list_entries 1048576 2097152 +1048576
chain_hlocks  327680  655360 +327680
chainhash_table   131072  262144 +131072
Total: Before=21046200, After=25699256, chg 22.00%

Re: [PATCH 0/2] Ajust lockdep static allocations

2016-09-23 Thread Babu Moger



On 9/23/2016 10:40 AM, Peter Zijlstra wrote:

On Fri, Sep 23, 2016 at 10:15:46AM -0500, Babu Moger wrote:


 Correct, We can't boot with lockdep. Sorry I did not make that clear.
We have a limit on static size of the kernel.

This stuff should be in .bss not .data. It should not affect the static
size at all. Or am I misunderstanding things?

  Here it is.
$ ./scripts/bloat-o-meter vmlinux.lockdep.small vmlinux.lockdep.big

What does bloat-o-meter have to do with things? The static image size is
not dependent on .bss, right?


 Peter,
 We checked again. Yes, It goes in .bss section. But in sparc we have 
to fit .text, .data,
 .bss in 7 permanent TLBs(that is totally 28MB). It was fine so far.  
But the commit
 1413c0389333 ("lockdep: Increase static allocations") added extra 4MB 
which makes
 it go beyond 28MB. That is causing system boot up problems in sparc. 
Yes. We know it.
 This is a limitation. Changing this limit in our hardware is a much 
bigger change which
 we cannot address right away. So, we are trying to come up with a 
solution which can
 work for all. I will re-post the patches with  CONFIG_BASE_SMALL 
option if there is no

 objections.

 CCing David Miller and Rob Gardner. They might be able to explain more 
if you
 have any more questions. Here is the discussion thread if you guys 
want to look at history.

 https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1237642.html

Re: [PATCH 0/2] Ajust lockdep static allocations

2016-09-23 Thread Babu Moger



On 9/23/2016 3:17 PM, Peter Zijlstra wrote:

On Fri, Sep 23, 2016 at 02:57:39PM -0500, Babu Moger wrote:


  We checked again. Yes, It goes in .bss section. But in sparc we have
  to fit .text, .data, .bss in 7 permanent TLBs(that is totally 28MB).
  It was fine so far.  But the commit 1413c0389333 ("lockdep: Increase
  static allocations") added extra 4MB which makes it go beyond 28MB.
  That is causing system boot up problems in sparc.

*sigh*, why didn't you start with that :/


Yes.  We know it.  This is a limitation. Changing this limit in our
hardware is a much bigger change which we cannot address right away.
So, we are trying to come up with a solution which can work for all. I
will re-post the patches with  CONFIG_BASE_SMALL option if there is no
objections.

OK, so double check BASE_SMALL doesn't imply other things you cannot
live with, Sparc64 isn't a dinky system. If BASE_SMALL works for you
then good, otherwise do a PROVE_LOCKING_SMALL symbol that is not user
selectable and have SPARC select that. Use the invisible Help for that
symbol to explain all this again.


 Thanks. Will work on it.




  CCing David Miller and Rob Gardner. They might be able to explain
  more if you have any more questions.

Nah, I think I remember enough of how the Sparc MMU works to see reason.

[PATCH 0/2] Ajust lockdep static allocations

2016-09-22 Thread Babu Moger

These patches adjust the static allocations for lockdep
data structures used for debugging locking correctness. The current
code reserves about 4MB extra space for these data structures. Most
of the configurations do not need these many data structures. While
testing, I have not seen it go beyond 20% of already reserved entries.

$grep "lock-classes" /proc/lockdep_stats
lock-classes:  1560 [max: 8191]

Reserving even more space seems unreasonable. So, keeping the default
entries small as before the Commit 1413c0389333 ("lockdep: Increase static
allocations"). Added new CONFIG_PROVE_LOCKING_PLUS in case someone
needs more entries to debug their large configuration.

Patch 1 : Adjusts the sizes based on the new config parameter
patch 2 : Adds new config parameter

Babu Moger (2):
  lockdep: Keep the default static allocations small
  config: Add new CONFIG_PROVE_LOCKING_PLUS

 kernel/locking/lockdep_internals.h |   14 +++---
 lib/Kconfig.debug  |   10 ++
 2 files changed, 21 insertions(+), 3 deletions(-)

[PATCH 1/2] lockdep: Keep the default static allocations small

2016-09-22 Thread Babu Moger

The Commit 1413c0389333 ("lockdep: Increase static allocations")
doubled the static allocation for lockdep. The size is unusually
high and not required for majority of the configurations. This
could cause problems to some environments with limited memory
configurations. We are already seeing issues on our sparc
configuration where kernel fails to boot when lockdep feature
is enabled. This patch keeps the default size to same as before
Commit 1413c0389333 ("lockdep: Increase static allocations").
Adding the new config parameter CONFIG_PROVE_LOCKING_PLUS in case
someone needs to enable more static space for lockdep entries,
lock chains and stack traces to debug large configurations.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/locking/lockdep_internals.h |   14 +++---
 1 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/lockdep_internals.h 
b/kernel/locking/lockdep_internals.h
index 51c4b24..47336a6 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -54,18 +54,26 @@ enum {
  * table (if it's not there yet), and we check it for lock order
  * conflicts and deadlocks.
  */
+#ifdef CONFIG_PROVE_LOCKING_PLUS
 #define MAX_LOCKDEP_ENTRIES32768UL
 
 #define MAX_LOCKDEP_CHAINS_BITS16
-#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
-
-#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 /*
  * Stack-trace: tightly packed array of stack backtrace
  * addresses. Protected by the hash_lock.
  */
 #define MAX_STACK_TRACE_ENTRIES524288UL
+#else
+#define MAX_LOCKDEP_ENTRIES16384UL
+#define MAX_LOCKDEP_CHAINS_BITS15
+#define MAX_STACK_TRACE_ENTRIES262144UL
+#endif
+
+#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
+
 
 extern struct list_head all_lock_classes;
 extern struct lock_chain lock_chains[];
-- 
1.7.1

[PATCH 2/2] config: Add new CONFIG_PROVE_LOCKING_PLUS

2016-09-22 Thread Babu Moger

Adding the new config parameter CONFIG_PROVE_LOCKING_PLUS in case
someone needs to enable more static space for lockdep entries,
lock chains and stack traces to debug large configurations.
The default size is kept small to cover majority of the configs.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 lib/Kconfig.debug |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b9cfdbf..d5d995e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1070,6 +1070,16 @@ config PROVE_LOCKING
 
 For more details, see Documentation/locking/lockdep-design.txt.
 
+config PROVE_LOCKING_PLUS
+   bool "Reserve extra space for prove locking correctness"
+   depends on PROVE_LOCKING
+   default n
+   help
+This feature reserves more space for lockdep entries, lock chains
+and stack traces to debug large configurations. This could add
+about additional 4MB static memory to kernel size. This is not
+suitable for embedded or other limited memory configurations.
+
 config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT 
&& LOCKDEP_SUPPORT
-- 
1.7.1

Re: [PATCH v2 1/2] config: Add new CONFIG_PROVE_LOCKING_SMALL

2016-09-27 Thread Babu Moger




On 9/27/2016 10:43 AM, Sam Ravnborg wrote:

On Tue, Sep 27, 2016 at 09:51:40AM -0500, Babu Moger wrote:

On 9/27/2016 6:40 AM, Peter Zijlstra wrote:

On Tue, Sep 27, 2016 at 06:46:25AM +0200, Sam Ravnborg wrote:

Since this is only relevant for sparc, and for sparc this is "select"ed,
then there is limited/no gain having this as a visible menu config option.

How about adding just a simple non-visible config symbol:

config PROVE_LOCKING_SMALL
bool

The nice help text can be added to the H file, and the select
can be move to the sparc/Kconfig file where it really belongs.

Yes, this should not be user selectable. I don't mind the help being
here though.

  How about this?
  Moved everything to arch/sparc/Kconfig.debug. It may be not useful
to have help in
config file as it is not visible. Lets have some explanation in .h file.
I will send v3 version if you all agree.
=
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index 6db35fb..67e58a1 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -21,4 +21,9 @@ config FRAME_POINTER
 depends on MCOUNT
 default y

+config PROVE_LOCKING_SMALL
+   bool
+   depends on PROVE_LOCKING && SPARC
+   default y
+
  endmenu

The idea is to have the SPAC specific stuff in arch/sparc/Kconfig,
and not scattered in Kconfig files all over the tree.

Therefore drop the "depends".

In sparc/Kconfig you then just do:

config SPARC
select PROVE_LOCKING_SMALL if PROVE_LOCKING

The if part is likely not needed as PROVE_LOCKING_SMALL will be ignored
unless PROVE_LOCKING is enabled.

 Sure. thanks.  Here it is below. I will re-post v3.


diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index cde1a62..353731f 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select ARCH_HAS_SG_CHAIN
+   select PROVE_LOCKING_SMALL if PROVE_LOCKING

 config SPARC32
def_bool !64BIT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba2b0c8..3ba1665 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1008,6 +1008,9 @@ config PROVE_LOCKING

 For more details, see Documentation/locking/lockdep-design.txt.

+config PROVE_LOCKING_SMALL
+   bool
+
 config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && 
STACKTRACE_SUPPORT && LOCKDEP_SUPPORT






Sam

Re: [PATCH v2 1/2] config: Add new CONFIG_PROVE_LOCKING_SMALL

2016-09-27 Thread Babu Moger



On 9/27/2016 6:40 AM, Peter Zijlstra wrote:

On Tue, Sep 27, 2016 at 06:46:25AM +0200, Sam Ravnborg wrote:

Since this is only relevant for sparc, and for sparc this is "select"ed,
then there is limited/no gain having this as a visible menu config option.

How about adding just a simple non-visible config symbol:

config PROVE_LOCKING_SMALL
bool

The nice help text can be added to the H file, and the select
can be move to the sparc/Kconfig file where it really belongs.

Yes, this should not be user selectable. I don't mind the help being
here though.

 How about this?
 Moved everything to arch/sparc/Kconfig.debug. It may be not useful to 
have help in

config file as it is not visible. Lets have some explanation in .h file.
I will send v3 version if you all agree.
=
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index 6db35fb..67e58a1 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -21,4 +21,9 @@ config FRAME_POINTER
depends on MCOUNT
default y

+config PROVE_LOCKING_SMALL
+   bool
+   depends on PROVE_LOCKING && SPARC
+   default y
+
 endmenu

[PATCH v3 0/2] Ajust lockdep static allocations for sparc

2016-09-27 Thread Babu Moger

These patches limit the static allocations for lockdep data structures
used for debugging locking correctness. For sparc, all the kernel's code,
data, and bss, must have locked translations in the TLB so that we don't
get TLB misses on kernel code and data. Current sparc chips have 8 TLB
entries available that may be locked down, and with a 4mb page size,
this gives a maximum of 32MB. With PROVE_LOCKING we could go over this
limit and cause system boot-up problems. These patches limit the static
allocations so that everything fits in current required size limit.

patch 1 : Adds new config parameter CONFIG_PROVE_LOCKING_SMALL
Patch 2 : Adjusts the sizes based on the new config parameter

v2-> v3:
   Some more comments from Sam Ravnborg and Peter Zijlstra.
   Defined PROVE_LOCKING_SMALL as invisible and moved the selection to
   arch/sparc/Kconfig. 

v1-> v2:
   As suggested by Peter Zijlstra, keeping the default as is.
   Introduced new config variable CONFIG_PROVE_LOCKING_SMALL
   to handle sparc specific case.

v0:
   Initial revision.

Babu Moger (2):
  config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL
for sparc
  lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined

 arch/sparc/Kconfig |1 +
 kernel/locking/lockdep_internals.h |   20 +---
 lib/Kconfig.debug  |3 +++
 3 files changed, 21 insertions(+), 3 deletions(-)

[PATCH v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc

2016-09-27 Thread Babu Moger

This new config parameter limits the space used for "Lock debugging:
prove locking correctness" by about 4MB. The current sparc systems have
the limitation of 32MB size for kernel size including .text, .data and
.bss sections. With PROVE_LOCKING feature, the kernel size could grow
beyond this limit and causing system boot-up issues. With this option,
kernel limits the size of the entries of lock_chains, stack_trace etc.,
so that kernel fits in required size limit. This is not visible to user
and only used for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/Kconfig |1 +
 lib/Kconfig.debug  |3 +++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 59b0960..8da321c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -44,6 +44,7 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select HAVE_ARCH_HARDENED_USERCOPY
+   select PROVE_LOCKING_SMALL if PROVE_LOCKING
 
 config SPARC32
def_bool !64BIT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cab7405..597e589 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1084,6 +1084,9 @@ config PROVE_LOCKING
 
 For more details, see Documentation/locking/lockdep-design.txt.
 
+config PROVE_LOCKING_SMALL
+   bool
+
 config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT 
&& LOCKDEP_SUPPORT
-- 
1.7.1

[PATCH v3 2/2] lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined

2016-09-27 Thread Babu Moger

Reduce the size of data structure for lockdep entries by half if
PROVE_LOCKING_SMALL if defined. This is used only for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/locking/lockdep_internals.h |   20 +---
 1 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/lockdep_internals.h 
b/kernel/locking/lockdep_internals.h
index 51c4b24..c2b8849 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,6 +46,14 @@ enum {
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
 
 /*
+ * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * .data and .bss to fit in required 32MB limit for the kernel. With
+ * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * So, reduce the static allocations for lockdeps related structures so that
+ * everything fits in current required size limit.
+ */
+#ifdef CONFIG_PROVE_LOCKING_SMALL
+/*
  * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
  * we track.
  *
@@ -54,18 +62,24 @@ enum {
  * table (if it's not there yet), and we check it for lock order
  * conflicts and deadlocks.
  */
+#define MAX_LOCKDEP_ENTRIES16384UL
+#define MAX_LOCKDEP_CHAINS_BITS15
+#define MAX_STACK_TRACE_ENTRIES262144UL
+#else
 #define MAX_LOCKDEP_ENTRIES32768UL
 
 #define MAX_LOCKDEP_CHAINS_BITS16
-#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
-
-#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 /*
  * Stack-trace: tightly packed array of stack backtrace
  * addresses. Protected by the hash_lock.
  */
 #define MAX_STACK_TRACE_ENTRIES524288UL
+#endif
+
+#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 extern struct list_head all_lock_classes;
 extern struct lock_chain lock_chains[];
-- 
1.7.1

[PATCH v2 2/2] lockdep: Keep the static allocations small for PROVE_LOCKING_SMALL

2016-09-26 Thread Babu Moger

Reduce the size of data structure for lockdep entries half if
PROVE_LOCKING_SMALL if defined. This is used for sparc. This
config variable is disabled by default.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/locking/lockdep_internals.h |   13 ++---
 1 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/lockdep_internals.h 
b/kernel/locking/lockdep_internals.h
index 51c4b24..7d364a6 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -54,18 +54,25 @@ enum {
  * table (if it's not there yet), and we check it for lock order
  * conflicts and deadlocks.
  */
+#ifdef CONFIG_PROVE_LOCKING_SMALL
+#define MAX_LOCKDEP_ENTRIES16384UL
+#define MAX_LOCKDEP_CHAINS_BITS15
+#define MAX_STACK_TRACE_ENTRIES262144UL
+#else
 #define MAX_LOCKDEP_ENTRIES32768UL
 
 #define MAX_LOCKDEP_CHAINS_BITS16
-#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
-
-#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 /*
  * Stack-trace: tightly packed array of stack backtrace
  * addresses. Protected by the hash_lock.
  */
 #define MAX_STACK_TRACE_ENTRIES524288UL
+#endif
+
+#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 extern struct list_head all_lock_classes;
 extern struct lock_chain lock_chains[];
-- 
1.7.1

[PATCH v2 0/2] Ajust lockdep static allocations for sparc

2016-09-26 Thread Babu Moger

These patches limit the static allocations for lockdep
data structures used for debugging locking correctness. This is required
for sparc as it requires .text, .data and .bss to fit in required 32MB
limit for the kernel. Right now, with PROVE_LOCKING enabled we could
go over this limit and cause system boot-up problems. These patches
limit the static allocations so that everything fits in current required
space limit. This is only visible for sparc.

patch 1 : Adds new config parameter CONFIG_PROVE_LOCKING_SMALL
Patch 2 : Adjusts the sizes based on the new config parameter

v1-> v2:
   As suggested by Peter Zijlstra, keeping the default as is.
   Introduced new config variable CONFIG_PROVE_LOCKING_SMALL
   to handle sparc specific case.

v0:
   Initial revision.

Babu Moger (2):
  config: Add new CONFIG_PROVE_LOCKING_SMALL
  lockdep: Keep the static allocations small for PROVE_LOCKING_SMALL

 kernel/locking/lockdep_internals.h |   13 ++---
 lib/Kconfig.debug  |   17 +
 2 files changed, 27 insertions(+), 3 deletions(-)

[PATCH v2 1/2] config: Add new CONFIG_PROVE_LOCKING_SMALL

2016-09-26 Thread Babu Moger

Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc.

This feature limits the space used for "Lock debugging: prove locking
correctness" by about 4MB. The current sparc systms have the limitation of
32MB size for kernel size including .text, .data and .bss sections. With
PROVE_LOCKING feature, the kernel size could grow beyond this limit and
causing system bootup issues. With this option, kernel limits the size
of the entries of lock_chains, stack_trace etc. so that kernel fits in
required size limit.  This is only visible for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 lib/Kconfig.debug |   17 +
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b9cfdbf..c79de25 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1035,6 +1035,7 @@ config PROVE_LOCKING
select DEBUG_MUTEXES
select DEBUG_LOCK_ALLOC
select TRACE_IRQFLAGS
+   select PROVE_LOCKING_SMALL if SPARC
default n
help
 This feature enables the kernel to prove that all locking
@@ -1070,6 +1071,22 @@ config PROVE_LOCKING
 
 For more details, see Documentation/locking/lockdep-design.txt.
 
+config PROVE_LOCKING_SMALL
+   bool "Limit the space for prove locking correctness"
+   depends on PROVE_LOCKING && SPARC
+   help
+This feature limits the space used for "Lock debugging: prove
+locking correctness" by about 4MB. In sparc system, all the
+kernel's code, data, and bss, must have locked translations in
+the TLB so that it does not hit TLB misses. The current sparc
+chips have 8 TLB entries available that may be locked down, and
+with a 4mb page size, this gives a maximum of 32mb of memory for
+the kernel size. With PROVE_LOCKING feature, the kernel size could
+grow beyond this limit and causing system bootup issues. With
+this option, kernel limits the size of the entries of lock_chains,
+stack_trace etc. to debug PROVE_LOCKING so that kernel size fits
+in 32MB. This is only visible for SPARC.
+
 config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT 
&& LOCKDEP_SUPPORT
-- 
1.7.1

[RFC PATCH 2/4] watchdog: Move shared definitions to nmi.h

2016-10-26 Thread Babu Moger

Move shared macros and definitions to nmi.h so that watchdog.c,
watchdog_hld.c or any other architecture specific handler can use
those definitions.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 include/linux/nmi.h |   19 +++
 kernel/watchdog.c   |   25 -
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a78c35c..0ea0a38 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,23 @@
 #include 
 #include 
 
+/*
+ * The run state of the lockup detectors is controlled by the content of the
+ * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
+ * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
+ *
+ * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
+ * are variables that are only used as an 'interface' between the parameters
+ * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
+ * 'watchdog_thresh' variable is handled differently because its value is not
+ * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
+ * is equal zero.
+ */
+#define NMI_WATCHDOG_ENABLED_BIT   0
+#define SOFT_WATCHDOG_ENABLED_BIT  1
+#define NMI_WATCHDOG_ENABLED  (1 << NMI_WATCHDOG_ENABLED_BIT)
+#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  * 
@@ -91,6 +108,8 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
+extern unsigned long watchdog_enabled;
+extern DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 extern unsigned long *watchdog_cpumask_bits;
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index a88e179..4ea7752 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -26,29 +26,12 @@
 #include 
 #include 
 
-/*
- * The run state of the lockup detectors is controlled by the content of the
- * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
- * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
- *
- * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
- * are variables that are only used as an 'interface' between the parameters
- * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
- * 'watchdog_thresh' variable is handled differently because its value is not
- * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
- * is equal zero.
- */
-#define NMI_WATCHDOG_ENABLED_BIT   0
-#define SOFT_WATCHDOG_ENABLED_BIT  1
-#define NMI_WATCHDOG_ENABLED  (1 << NMI_WATCHDOG_ENABLED_BIT)
-#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
-
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
 #else
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
 int __read_mostly nmi_watchdog_enabled;
 int __read_mostly soft_watchdog_enabled;
@@ -96,7 +79,7 @@
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
 static unsigned long soft_lockup_nmi_warn;
-- 
1.7.1

[RFC PATCH 1/4] watchdog: Remove hardlockup handler references

2016-10-26 Thread Babu Moger

Separate hardlockup code from watchdog.c. It is mostly straight forward.
Remove everything inside CONFIG_HARDLOCKUP_DETECTORS. This code will go
to file watchdog_hld.c. 

We also define weak handlers watchdog_nmi_enable and watchdog_nmi_disable.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/watchdog.c |  251 ++---
 1 files changed, 7 insertions(+), 244 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..a88e179 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,7 +24,6 @@
 
 #include 
 #include 
-#include 
 #include 
 
 /*
@@ -100,50 +99,8 @@
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static DEFINE_PER_CPU(bool, hard_watchdog_warn);
-static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
-static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-#endif
 static unsigned long soft_lockup_nmi_warn;
 
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-unsigned int __read_mostly hardlockup_panic =
-   CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
-static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
-   watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
-   if (!strncmp(str, "panic", 5))
-   hardlockup_panic = 1;
-   else if (!strncmp(str, "nopanic", 7))
-   hardlockup_panic = 0;
-   else if (!strncmp(str, "0", 1))
-   watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-   else if (!strncmp(str, "1", 1))
-   watchdog_enabled |= NMI_WATCHDOG_ENABLED;
-   return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-#endif
-
 unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
 
@@ -264,43 +221,12 @@ void touch_all_softlockup_watchdogs(void)
wq_watchdog_touch(-1);
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-void touch_nmi_watchdog(void)
-{
-   /*
-* Using __raw here because some code paths have
-* preemption enabled.  If preemption is enabled
-* then interrupts should be enabled too, in which
-* case we shouldn't have to worry about the watchdog
-* going off.
-*/
-   raw_cpu_write(watchdog_nmi_touch, true);
-   touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-#endif
-
 void touch_softlockup_watchdog_sync(void)
 {
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, 0);
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-/* watchdog detector functions */
-static bool is_hardlockup(void)
-{
-   unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
-   if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-   return true;
-
-   __this_cpu_write(hrtimer_interrupts_saved, hrint);
-   return false;
-}
-#endif
-
 static int is_softlockup(unsigned long touch_ts)
 {
unsigned long now = get_timestamp();
@@ -313,78 +239,18 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-
-static struct perf_event_attr wd_hw_attr = {
-   .type   = PERF_TYPE_HARDWARE,
-   .config = PERF_COUNT_HW_CPU_CYCLES,
-   .size   = sizeof(struct perf_event_attr),
-   .pinned = 1,
-   .disabled   = 1,
-};
-
-/* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event,
-struct perf_sample_data *data,
-struct pt_regs *regs)
-{
-   /* Ensure the watchdog never gets throttled */
-   event->hw.interrupts = 0;
-
-   if (__this_cpu_read(watchdog_nmi_touch) == true) {
-   __this_cpu_write(watchdog_nmi_touch, false);
-   return;
-   }
-
-   /* check for a hardlockup
-* This is done by making sure our timer interrupt
-* is incrementing.  The timer interrupt should have
-* fired multiple times before we overflow'd.  If it hasn't
-* then this is a goo

[RFC PATCH 3/4] watchdog: Move hardlockup detector to separate file

2016-10-26 Thread Babu Moger

Move hardlockup detector code to watchdog_hld.c.
Also update the makefile accordigly.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/Makefile   |1 +
 kernel/watchdog_hld.c |  238 +
 2 files changed, 239 insertions(+), 0 deletions(-)
 create mode 100644 kernel/watchdog_hld.c

diff --git a/kernel/Makefile b/kernel/Makefile
index eb26e12..314e7d6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
new file mode 100644
index 000..cd690fb
--- /dev/null
+++ b/kernel/watchdog_hld.c
@@ -0,0 +1,238 @@
+/*
+ * Detect hard and soft lockups on a system
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Note: Most of this code is borrowed heavily from the original softlockup
+ * detector, so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
+ * to those contributors as well.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+   CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+   watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+   if (!strncmp(str, "panic", 5))
+   hardlockup_panic = 1;
+   else if (!strncmp(str, "nopanic", 7))
+   hardlockup_panic = 0;
+   else if (!strncmp(str, "0", 1))
+   watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+   else if (!strncmp(str, "1", 1))
+   watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+   return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+void touch_nmi_watchdog(void)
+{
+   /*
+* Using __raw here because some code paths have
+* preemption enabled.  If preemption is enabled
+* then interrupts should be enabled too, in which
+* case we shouldn't have to worry about the watchdog
+* going off.
+*/
+   raw_cpu_write(watchdog_nmi_touch, true);
+   touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+/* watchdog detector functions */
+static bool is_hardlockup(void)
+{
+   unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+
+   if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+   return true;
+
+   __this_cpu_write(hrtimer_interrupts_saved, hrint);
+   return false;
+}
+
+static struct perf_event_attr wd_hw_attr = {
+   .type   = PERF_TYPE_HARDWARE,
+   .config = PERF_COUNT_HW_CPU_CYCLES,
+   .size   = sizeof(struct perf_event_attr),
+   .pinned = 1,
+   .disabled   = 1,
+};
+
+/* Callback function for perf event subsystem */
+static void watchdog_overflow_callback(struct perf_event *event,
+struct perf_sample_data *data,
+struct pt_regs *regs)
+{
+   /* Ensure the watchdog never gets throttled */
+   event->hw.interrupts = 0;
+
+   if (__this_cpu_read(watchdog_nmi_touch) == true) {
+   __this_cpu_write(watchdog_nmi_touch, false);
+   return;
+   }
+
+   /* check for a hardlockup
+* This is done by making sure our timer interrupt
+* is incrementing.  The timer interrupt should have
+* fired multiple times before we overflow'd.  If it hasn't
+* then this is a good indication the cpu is stuck
+*/
+   if (is_hardlockup()) {
+   int this_cpu = smp_processor_id();
+   struct pt_regs *regs = get_irq_regs();
+
+   /* only print hardlockups once */
+   if (__this_cpu_read(hard_watchdog_warn)

[RFC PATCH 4/4] sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

2016-10-26 Thread Babu Moger

Implement functions watchdog_nmi_enable and watchdog_nmi_disable
to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog
handler. Currently, we do not have a way to enable/disable nmi watchdog
dynamically. With these patches we can enable or disable arch
specific nmi watchdogs using proc or sysctl interface.

Example commands.
To enable: echo 1 >  /proc/sys/kernel/nmi_watchdog
To disable: echo 0 >  /proc/sys/kernel/nmi_watchdog

It can also achieved using the sysctl parameter kernel.nmi_watchdog

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/kernel/nmi.c |   44 +++-
 1 files changed, 43 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a9973bb..95e73c6 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -42,7 +42,7 @@
  */
 atomic_t nmi_active = ATOMIC_INIT(0);  /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-
+static int nmi_init_done;
 static unsigned int nmi_hz = HZ;
 static DEFINE_PER_CPU(short, wd_enabled);
 static int endflag __initdata;
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 
 void stop_nmi_watchdog(void *unused)
 {
+   if (!__this_cpu_read(wd_enabled))
+   return;
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__this_cpu_write(wd_enabled, 0);
atomic_dec(_active);
@@ -207,6 +209,9 @@ static int __init check_nmi_watchdog(void)
 
 void start_nmi_watchdog(void *unused)
 {
+   if (__this_cpu_read(wd_enabled))
+   return;
+
__this_cpu_write(wd_enabled, 1);
atomic_inc(_active);
 
@@ -259,6 +264,8 @@ int __init nmi_init(void)
}
}
 
+   nmi_init_done = 1;
+
return err;
 }
 
@@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str)
return 0;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * sparc specific NMI watchdog enable function.
+ * Enables watchdog if it is not enabled already.
+ */
+int watchdog_nmi_enable(unsigned int cpu)
+{
+   if (atomic_read(_active) == -1) {
+   pr_warn("NMI watchdog cannot be enabled or disabled\n");
+   return -1;
+   }
+
+   /*
+* watchdog thread could start even before nmi_init is called.
+* Just Return in that case. Let nmi_init finish the init
+* process first.
+*/
+   if (!nmi_init_done)
+   return 0;
+
+   smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
+
+   return 0;
+}
+/*
+ * sparc specific NMI watchdog disable function.
+ * Disables watchdog if it is not disabled already.
+ */
+void watchdog_nmi_disable(unsigned int cpu)
+{
+   if (atomic_read(_active) == -1)
+   pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
+   else
+   smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
+}
-- 
1.7.1

[RFC PATCH 0/4] Clean up watchdog handlers

2016-10-26 Thread Babu Moger

This is an attempt to cleanup watchdog handlers. Right now,
kernel/watchdog.c implements both softlockup and hardlockup detectors.
Softlockup code is generic. Hardlockup code is arch specific. Some
architectures don't use hardlockup detectors. They use their own watchdog
detectors. To make both these combination work, we have numerous #ifdefs
in kernel/watchdog.c.

We are trying here to make these handlers independent of each other.
Also provide an interface for architectures to implement their own
handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
as weak such that architectures can override its definitions.

Thanks to Don Zickus for his suggestions.
Here is the previous discussion
http://www.spinics.net/lists/sparclinux/msg16441.html

Babu Moger (4):
  watchdog: Remove hardlockup handler references
  watchdog: Move shared definitions to nmi.h
  watchdog: Move hardlockup detector in separate file
  sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

 arch/sparc/kernel/nmi.c |   44 -
 include/linux/nmi.h |   19 
 kernel/Makefile |1 +
 kernel/watchdog.c   |  276 ++-
 kernel/watchdog_hld.c   |  238 
 5 files changed, 312 insertions(+), 266 deletions(-)
 create mode 100644 kernel/watchdog_hld.c

Re: [PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable

2016-10-17 Thread Babu Moger


Don,

On 10/17/2016 12:31 PM, Don Zickus wrote:

On Thu, Oct 13, 2016 at 01:38:01PM -0700, Babu Moger wrote:

Currently we do not have a way to enable/disable arch specific
watchdog handlers if it was implemented by any of the architectures.

This patch introduces new functions arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable which can be used to enable/disable architecture
specific NMI watchdog handlers. These functions are defined as weak as
architectures can override their definitions to enable/disable nmi
watchdog behaviour.

Hi Babu,

This patch tested fine on my x86 box and I am ok with the changes.

I do have one small cosmetic request below for a failure path.  Other than
that I will give my ack.
Yes. I am testing these changes. If everything goes as expected, I will 
post  v3 version

tomorrow. Thanks Babu



Cheers,
Don


Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
  kernel/watchdog.c |   65 +++-
  1 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..d1e84e6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -46,7 +46,7 @@
  
  static DEFINE_MUTEX(watchdog_proc_mutex);
  
-#ifdef CONFIG_HARDLOCKUP_DETECTOR

+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
  static unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
  #else
  static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
@@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu)
   */
  static unsigned long cpu0_err;
  
-static int watchdog_nmi_enable(unsigned int cpu)

+static int arch_watchdog_nmi_enable(unsigned int cpu)
  {
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
-	/* nothing to do if the hard lockup detector is disabled */

-   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-   goto out;
-
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
@@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu)
goto out_save;
}
  
-	/*

-* Disable the hard lockup detector if _any_ CPU fails to set up
-* set up the hardware perf event. The watchdog() function checks
-* the NMI_WATCHDOG_ENABLED bit periodically.
-*
-* The barriers are for syncing up watchdog_enabled across all the
-* cpus, as clear_bit() does not use barriers.
-*/
-   smp_mb__before_atomic();
-   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
-   smp_mb__after_atomic();
-
/* skip displaying the same error again */
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
return PTR_ERR(event);

In the arch_watchdog_nmi_enable code is a pr_info on failure

pr_info("Shutting down hard lockup detector on all cpus\n");

that should be moved to below..



@@ -658,7 +642,7 @@ out:
return 0;
  }
  
-static void watchdog_nmi_disable(unsigned int cpu)

+static void arch_watchdog_nmi_disable(unsigned int cpu)
  {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
@@ -676,8 +660,13 @@ static void watchdog_nmi_disable(unsigned int cpu)

  }
  
  #else

-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
-static void watchdog_nmi_disable(unsigned int cpu) { return; }
+/*
+ * These two functions are mostly architecture specific
+ * defining them as weak here.
+ */
+int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; }
+void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; }
+
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
  
  static struct smp_hotplug_thread watchdog_threads = {

@@ -781,6 +770,40 @@ void lockup_detector_resume(void)
put_online_cpus();
  }
  
+void watchdog_nmi_disable(unsigned int cpu)

+{
+   arch_watchdog_nmi_disable(cpu);
+}
+
+int watchdog_nmi_enable(unsigned int cpu)
+{
+   int err;
+
+   /* nothing to do if the hard lockup detector is disabled */
+   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+   return 0;
+
+   err = arch_watchdog_nmi_enable(cpu);
+
+   if (err) {
+   /*
+* Disable the hard lockup detector if _any_ CPU fails to set up
+* set up the hardware perf event. The watchdog() function 
checks
+* the NMI_WATCHDOG_ENABLED bit periodically.
+*
+* The barriers are for syncing up watchdog_enabled across all 
the
+* cpus, as clear_bit() does not use barriers.
+*/
+   smp_mb__before_atomic();
+   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
+   smp_mb__after_atomic();

moved to here:

pr_info("Shutting down hard lockup det

[PATCH v3 2/2] sparc: Implement arch_watchdog_nmi_enable and arch_watchdog_nmi_disable

2016-10-18 Thread Babu Moger

Implement functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog
handler. Currently, we do not have a way to enable/disable nmi watchdog
dynamically. With these patches we can enable or disable arch
specific nmi watchdogs using proc or sysctl interface.

Example commands.
To enable: echo 1 >  /proc/sys/kernel/nmi_watchdog
To disable: echo 0 >  /proc/sys/kernel/nmi_watchdog

It can also achieved using the sysctl parameter kernel.nmi_watchdog

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/kernel/nmi.c |   44 +++-
 1 files changed, 43 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a9973bb..b55d518 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -42,7 +42,7 @@ static int panic_on_timeout;
  */
 atomic_t nmi_active = ATOMIC_INIT(0);  /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-
+static int nmi_init_done;
 static unsigned int nmi_hz = HZ;
 static DEFINE_PER_CPU(short, wd_enabled);
 static int endflag __initdata;
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 
 void stop_nmi_watchdog(void *unused)
 {
+   if (!__this_cpu_read(wd_enabled))
+   return;
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__this_cpu_write(wd_enabled, 0);
atomic_dec(_active);
@@ -207,6 +209,9 @@ error:
 
 void start_nmi_watchdog(void *unused)
 {
+   if (__this_cpu_read(wd_enabled))
+   return;
+
__this_cpu_write(wd_enabled, 1);
atomic_inc(_active);
 
@@ -259,6 +264,8 @@ int __init nmi_init(void)
}
}
 
+   nmi_init_done = 1;
+
return err;
 }
 
@@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str)
return 0;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * sparc specific NMI watchdog enable function.
+ * Enables watchdog if it is not enabled already.
+ */
+int arch_watchdog_nmi_enable(unsigned int cpu)
+{
+   if (atomic_read(_active) == -1) {
+   pr_warn("NMI watchdog cannot be enabled or disabled\n");
+   return -1;
+   }
+
+   /*
+* watchdog thread could start even before nmi_init is called.
+* Just Return in that case. Let nmi_init finish the init
+* process first.
+*/
+   if (!nmi_init_done)
+   return 0;
+
+   smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
+
+   return 0;
+}
+/*
+ * sparc specific NMI watchdog disable function.
+ * Disables watchdog if it is not disabled already.
+ */
+void arch_watchdog_nmi_disable(unsigned int cpu)
+{
+   if (atomic_read(_active) == -1)
+   pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
+   else
+   smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
+}
-- 
1.7.1

[PATCH v3 0/2] Introduce arch specific nmi enable, disable handlers

2016-10-18 Thread Babu Moger

During our testing we noticed that nmi watchdogs in sparc could not be disabled 
or
enabled dynamically using sysctl/proc interface. Sparc uses its own arch 
specific
nmi watchdogs. There is a sysctl and proc 
interface(proc/sys/kernel/nmi_watchdog)
to enable/disable nmi watchdogs. However, that is not working for sparc. There
is no interface to feed this parameter to arch specific nmi watchdogs.

These patches extend the same sysctl/proc interface to enable or disable
these arch specific nmi watchdogs dynamically. Introduced new functions
arch_watchdog_nmi_enable and arch_watchdog_nmi_disable which can be implemented
in arch specific handlers.
If you think there is a better way to do this. Please advice.

Tested on sparc. Compile tested on x86.

v3:
  Made one more change per Don Zickus comments.
  Moved failure path messages to into generic code inside watchdog_nmi_enable.
  Also added matching prints in sparc to warn about the failure.

v2:
  a)Sam Ravnborg's comments about making the definitions visible.
  With the new approach we dont need those definitions((NMI_WATCHDOG_ENABLED,
  SOFT_WATCHDOG_ENABLED etc..) outside watchdog.c. So no action.

  b) Made changes per Don Zickus comments.
  Don, I could not use your patches as is. Reason is sparc does not define
  CONFIG_HARDLOCKUP_DETECTOR. So, defining default __weak function did not
  work for me. However, I have used your idea to define __weak functions
  arch_watchdog_nmi_enable and arch_watchdog_nmi_disable when 
CONFIG_HARDLOCKUP_DETECTOR
  is not defined. I feel this should have very less impact on the races you are
  concerned about. Please take a look. Feel free to suggest.

  Patch2 changes: I had to introduce new variable nmi_init_done to synchronize
  watchdog thread and kernel init thread.

v1:
 Initial version. Discussion thread here
 http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1245427.html

Babu Moger (2):
  watchdog: Introduce arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable
  sparc: Implement arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable

 arch/sparc/kernel/nmi.c |   44 +-
 kernel/watchdog.c   |   69 +++---
 2 files changed, 89 insertions(+), 24 deletions(-)

[PATCH v3 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable

2016-10-18 Thread Babu Moger

Currently we do not have a way to enable/disable arch specific
watchdog handlers if it was implemented by any of the architectures.

This patch introduces new functions arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable which can be used to enable/disable architecture
specific NMI watchdog handlers. These functions are defined as weak as
architectures can override their definitions to enable/disable nmi
watchdog behaviour.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/watchdog.c |   69 +++-
 1 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..2d0765b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -46,7 +46,7 @@
 
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
 static unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
 #else
 static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
@@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu)
  */
 static unsigned long cpu0_err;
 
-static int watchdog_nmi_enable(unsigned int cpu)
+static int arch_watchdog_nmi_enable(unsigned int cpu)
 {
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
-   /* nothing to do if the hard lockup detector is disabled */
-   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-   goto out;
-
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
@@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu)
goto out_save;
}
 
-   /*
-* Disable the hard lockup detector if _any_ CPU fails to set up
-* set up the hardware perf event. The watchdog() function checks
-* the NMI_WATCHDOG_ENABLED bit periodically.
-*
-* The barriers are for syncing up watchdog_enabled across all the
-* cpus, as clear_bit() does not use barriers.
-*/
-   smp_mb__before_atomic();
-   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
-   smp_mb__after_atomic();
-
/* skip displaying the same error again */
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
return PTR_ERR(event);
@@ -645,8 +629,6 @@ static int watchdog_nmi_enable(unsigned int cpu)
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
cpu, PTR_ERR(event));
 
-   pr_info("Shutting down hard lockup detector on all cpus\n");
-
return PTR_ERR(event);
 
/* success path */
@@ -658,7 +640,7 @@ out:
return 0;
 }
 
-static void watchdog_nmi_disable(unsigned int cpu)
+static void arch_watchdog_nmi_disable(unsigned int cpu)
 {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
@@ -676,8 +658,13 @@ static void watchdog_nmi_disable(unsigned int cpu)
 }
 
 #else
-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
-static void watchdog_nmi_disable(unsigned int cpu) { return; }
+/*
+ * These two functions are mostly architecture specific
+ * defining them as weak here.
+ */
+int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; }
+void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; }
+
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 static struct smp_hotplug_thread watchdog_threads = {
@@ -781,6 +768,42 @@ void lockup_detector_resume(void)
put_online_cpus();
 }
 
+void watchdog_nmi_disable(unsigned int cpu)
+{
+   arch_watchdog_nmi_disable(cpu);
+}
+
+int watchdog_nmi_enable(unsigned int cpu)
+{
+   int err;
+
+   /* nothing to do if the hard lockup detector is disabled */
+   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+   return 0;
+
+   err = arch_watchdog_nmi_enable(cpu);
+
+   if (err) {
+   /*
+* Disable the hard lockup detector if _any_ CPU fails to set up
+* set up the hardware perf event. The watchdog() function 
checks
+* the NMI_WATCHDOG_ENABLED bit periodically.
+*
+* The barriers are for syncing up watchdog_enabled across all 
the
+* cpus, as clear_bit() does not use barriers.
+*/
+   smp_mb__before_atomic();
+   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
+   smp_mb__after_atomic();
+
+   pr_info("Shutting down hard lockup detector on all cpus\n");
+
+   return err;
+   }
+
+   return 0;
+}
+
 static int update_watchdog_all_cpus(void)
 {
int ret;
-- 
1.7.1

Re: [PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable

2016-10-24 Thread Babu Moger



On 10/24/2016 10:19 AM, Don Zickus wrote:

On Fri, Oct 21, 2016 at 04:50:21PM -0500, Babu Moger wrote:

Don,

On 10/21/2016 2:19 PM, Andrew Morton wrote:

On Fri, 21 Oct 2016 11:11:14 -0400 Don Zickus <dzic...@redhat.com> wrote:


On Thu, Oct 20, 2016 at 08:25:27PM -0700, Andrew Morton wrote:

On Thu, 20 Oct 2016 12:14:14 -0400 Don Zickus <dzic...@redhat.com> wrote:


-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
-static void watchdog_nmi_disable(unsigned int cpu) { return; }
+/*
+ * These two functions are mostly architecture specific
+ * defining them as weak here.
+ */
+int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; }
+void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; }
+
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */

This is a strange way of using __weak.

Take a look at (one of many examples) kernel/module.c:module_alloc().
We simply provide a default implementation and some other compilation
unit can override (actually replace) that at link time.  No strange
ifdeffing needed.

Yeah, this is mostly because of how we enable the hardlockup detector.

Some arches use the perf hw and enable CONFIG_HARDLOCKUP_DETECTOR.  Other
arches just use their own variant of nmi and set CONFIG_HAVE_NMI_WATCHDOG and
the rest of the arches do not use this.

So the thought was if CONFIG_HARDLOCKUP_DETECTOR use that implementation,
everyone else use the __weak version.  Then the arches like sparc can override
the weak version with their own nmi enablement.

I don't know how to represent those 3 states correctly and the above is what
we end up with.



Is there a suitable site where we could capture these considerations in
a code comment?

Hi Andrew,

I am not sure I understand your question.  When you say 'site', are you
referring to the kernel/watchdog.c file?

Yes, somewhere in there I guess.

The problem with this sort of thing is that the implementation is
splattered over multiple places in one file or in several files so
there's no clear place to document what's happening.  But I think this
situation *should* be documented somewhere.  Or maybe that just isn't
worthwhile - feel free to disagree!


The other approach that might help de-clutter this file, is to pull out the
HARDLOCKUP_DETECTOR changes (as they are arch specific) and move it to say
kernel/watchdog_hw_ld.c.  Then all the nmi hooks in kernel/watchdog.c can be
__weak and overridden by the kernel_watchdog_hw_ld.c file or the sparc
files.

This would leave kernel/watchdog.c with just a framework and the
arch-agnostic softlockup detector.  Probably easier to read and digest.

Don, Yes. I am fine with your idea.  Let me know if you need any help here.
If you want I can
start working this cleanup myself. I might take sometime as I need to spend
sometime
understanding the whole watchdog stuff first. If you have already started
working on this
then I will let you continue.

Hi Babu,

Feel free to start looking at it.  I am trying to wrap up a couple of things
here and will only be able to little poke at it the next couple of days.
But for the most part you might be able to rip out anything with
CONFIG_HARDLOCKUP_DETECTOR and put it into another file.  Then just clean up
the pieces.


Don. Sure. I have started on this. Will send RFC version sometime this week.



Cheers,
Don


Well, it depends how the code ends up looking.  It's best to separate
functional changes from cleanups.  Generally I think it's best to do
"cleanup comes first", because it's then simpler to revert the
functional change if it has problems.  Plus people are more
*interested* in the functional change so it's best to have that at
top-of-tree.

Re: [PATCH v2 RESEND] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers

2016-10-25 Thread Babu Moger



On 10/25/2016 1:51 AM, Mathias Nyman wrote:

On 24.10.2016 17:52, Babu Moger wrote:


On 10/24/2016 5:54 AM, Yoshihiro Shimoda wrote:

Hi,


From: Mathias Nyman
Sent: Monday, October 24, 2016 6:58 PM

On 22.10.2016 01:25, Babu Moger wrote:

Never seen XHCI auto handoff working on TI and RENESAS cards.
Eventually, we force handoff. This code forces the handoff
unconditionally. It saves 5 seconds boot time for each card.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Do the Renesas and TI controllers still advertise the extended 
capability

for the handoff? (XHCI_EXT_CAPS_LEGACY)

I don't see this capability.  Here is lspci output.



It's not a PCI capability, it's a xhci Extended Capability.

If the capability is supported, and handoff fail, then 
quirk_usb_handoff_xhci() will print

"xHCI BIOS handoff failed (BIOS bug ?)"



  Yes. I see these messages. After this patch I don't see these messages.

Further on in the same function we wait for the "controller not ready" 
bit in the status

register to clear. If that times it prints out:

"xHCI HW not ready after 5 sec (HC bug?) status"


  No. I didn't see this messages.



Do you see any of these two messages in the log?

-Mathias

[PATCH v2 RESEND] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers

2016-10-21 Thread Babu Moger

Never seen XHCI auto handoff working on TI and RENESAS cards.
Eventually, we force handoff. This code forces the handoff
unconditionally. It saves 5 seconds boot time for each card.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
v2: 
 Made changes per comments from Greg KH.
 Extra space removal in assignment
 Added both vendor and device id checks.
 Resending the patch. Original discussion here.
 https://marc.info/?t=14522116207=1=4

 drivers/usb/host/pci-quirks.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 35af362..31c9502 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -996,6 +996,14 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev)
}
val = readl(base + ext_cap_offset);
 
+   /* Auto handoff never worked for these devices. Force it and continue */
+   if ((pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) ||
+   (pdev->vendor == PCI_VENDOR_ID_RENESAS
+&& pdev->device == 0x0014)) {
+   val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED;
+   writel(val, base + ext_cap_offset);
+   }
+
/* If the BIOS owns the HC, signal that the OS wants it, and wait */
if (val & XHCI_HC_BIOS_OWNED) {
writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset);
-- 
1.7.1

Re: [PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable

2016-10-21 Thread Babu Moger


Don,

On 10/21/2016 2:19 PM, Andrew Morton wrote:

On Fri, 21 Oct 2016 11:11:14 -0400 Don Zickus  wrote:


On Thu, Oct 20, 2016 at 08:25:27PM -0700, Andrew Morton wrote:

On Thu, 20 Oct 2016 12:14:14 -0400 Don Zickus  wrote:


-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
-static void watchdog_nmi_disable(unsigned int cpu) { return; }
+/*
+ * These two functions are mostly architecture specific
+ * defining them as weak here.
+ */
+int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; }
+void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; }
+
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */

This is a strange way of using __weak.

Take a look at (one of many examples) kernel/module.c:module_alloc().
We simply provide a default implementation and some other compilation
unit can override (actually replace) that at link time.  No strange
ifdeffing needed.

Yeah, this is mostly because of how we enable the hardlockup detector.

Some arches use the perf hw and enable CONFIG_HARDLOCKUP_DETECTOR.  Other
arches just use their own variant of nmi and set CONFIG_HAVE_NMI_WATCHDOG and
the rest of the arches do not use this.

So the thought was if CONFIG_HARDLOCKUP_DETECTOR use that implementation,
everyone else use the __weak version.  Then the arches like sparc can override
the weak version with their own nmi enablement.

I don't know how to represent those 3 states correctly and the above is what
we end up with.



Is there a suitable site where we could capture these considerations in
a code comment?

Hi Andrew,

I am not sure I understand your question.  When you say 'site', are you
referring to the kernel/watchdog.c file?

Yes, somewhere in there I guess.

The problem with this sort of thing is that the implementation is
splattered over multiple places in one file or in several files so
there's no clear place to document what's happening.  But I think this
situation *should* be documented somewhere.  Or maybe that just isn't
worthwhile - feel free to disagree!


The other approach that might help de-clutter this file, is to pull out the
HARDLOCKUP_DETECTOR changes (as they are arch specific) and move it to say
kernel/watchdog_hw_ld.c.  Then all the nmi hooks in kernel/watchdog.c can be
__weak and overridden by the kernel_watchdog_hw_ld.c file or the sparc
files.

This would leave kernel/watchdog.c with just a framework and the
arch-agnostic softlockup detector.  Probably easier to read and digest.


Don, Yes. I am fine with your idea.  Let me know if you need any help 
here.  If you want I can
start working this cleanup myself. I might take sometime as I need to 
spend sometime
understanding the whole watchdog stuff first. If you have already 
started working on this

then I will let you continue.


Well, it depends how the code ends up looking.  It's best to separate
functional changes from cleanups.  Generally I think it's best to do
"cleanup comes first", because it's then simpler to revert the
functional change if it has problems.  Plus people are more
*interested* in the functional change so it's best to have that at
top-of-tree.

Re: [PATCH v2 RESEND] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers

2016-10-24 Thread Babu Moger



On 10/24/2016 5:54 AM, Yoshihiro Shimoda wrote:

Hi,


From: Mathias Nyman
Sent: Monday, October 24, 2016 6:58 PM

On 22.10.2016 01:25, Babu Moger wrote:

Never seen XHCI auto handoff working on TI and RENESAS cards.
Eventually, we force handoff. This code forces the handoff
unconditionally. It saves 5 seconds boot time for each card.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>

Do the Renesas and TI controllers still advertise the extended capability
for the handoff? (XHCI_EXT_CAPS_LEGACY)

I don't see this capability.  Here is lspci output.

# lspci -s 0009:01:00.0 -vvv
0009:01:00.0 USB controller: Texas Instruments TUSB73x0 SuperSpeed USB 
3.0 xHCI Host Controller (rev 02) (prog-if 30 [XHCI])
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- 
ParErr- Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- 
SERR- 
Latency: 0, Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 000e
Region 0: Memory at 1 (64-bit, non-prefetchable) [size=64K]
Region 2: Memory at 10001 (64-bit, non-prefetchable) [size=8K]
Region 4: [virtual] Memory at fffdfdc0 (32-bit, 
non-prefetchable)
Region 5: [virtual] Memory at fffdfdc0 (32-bit, 
non-prefetchable)

[virtual] Expansion ROM at fffdfdc0 [disabled]
Capabilities: [40] Power Management version 3
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA 
PME(D0+,D1+,D2+,D3hot+,D3cold-)

Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [48] MSI: Enable- Count=1/8 Maskable- 64bit+
Address:   Data: 
Capabilities: [70] Express (v2) Endpoint, MSI 00
DevCap: MaxPayload 1024 bytes, PhantFunc 0, Latency L0s 
unlimited, L1 unlimited

ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset-
DevCtl: Report errors: Correctable- Non-Fatal- Fatal- 
Unsupported-

RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop-
MaxPayload 256 bytes, MaxReadReq 512 bytes
DevSta: CorrErr+ UncorrErr- FatalErr- UnsuppReq+ 
AuxPwr- TransPend-
LnkCap: Port #0, Speed 5GT/s, Width x1, ASPM L0s L1, 
Latency L0 <2us, L1 <64us

ClockPM+ Surprise- LLActRep- BwNot-
LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- Retrain- 
CommClk-

ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
LnkSta: Speed 5GT/s, Width x1, TrErr- Train- SlotClk+ 
DLActive- BWMgmt- ABWMgmt-
DevCap2: Completion Timeout: Not Supported, 
TimeoutDis+, LTR-, OBFF Not Supported
DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, 
LTR-, OBFF Disabled
LnkCtl2: Target Link Speed: 5GT/s, EnterCompliance- 
SpeedDis-
 Transmit Margin: Normal Operating Range, 
EnterModifiedCompliance- ComplianceSOS-

 Compliance De-emphasis: -6dB
LnkSta2: Current De-emphasis Level: -3.5dB, 
EqualizationComplete-, EqualizationPhase1-
 EqualizationPhase2-, EqualizationPhase3-, 
LinkEqualizationRequest-

Capabilities: [c0] MSI-X: Enable+ Count=8 Masked-
Vector table: BAR=2 offset=
PBA: BAR=2 offset=1000
Capabilities: [100 v2] Advanced Error Reporting
UESta:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- 
UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
UEMsk:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- 
UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- 
UnxCmplt- RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol-
CESta:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- 
NonFatalErr+
CEMsk:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- 
NonFatalErr+
AERCap: First Error Pointer: 00, GenCap+ CGenEn- 
ChkCap+ ChkEn-

Capabilities: [150 v1] Device Serial Number 08-00-28-00-00-20-00-00
Kernel driver in use: xhci_hcd



Is this some known issue with these vendors controllers? Is there some 
documentation
about this, errata or anything?

Adding Yoshihiro Shimoda, he might know about the Renesas controller.

Thank you for adding me on this email.
However, unfortunately I don't know the detail about Renesas PCIe xHCI 
controllers.
(I know the xHCI controller of R-Car SoCs for now.)

Best regards,
Yoshihiro Shimoda

Re: [RFC PATCH 0/4] Clean up watchdog handlers

2016-10-31 Thread Babu Moger



On 10/31/2016 4:00 PM, Don Zickus wrote:

On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:

This is an attempt to cleanup watchdog handlers. Right now,
kernel/watchdog.c implements both softlockup and hardlockup detectors.
Softlockup code is generic. Hardlockup code is arch specific. Some
architectures don't use hardlockup detectors. They use their own watchdog
detectors. To make both these combination work, we have numerous #ifdefs
in kernel/watchdog.c.

We are trying here to make these handlers independent of each other.
Also provide an interface for architectures to implement their own
handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
as weak such that architectures can override its definitions.

Thanks to Don Zickus for his suggestions.
Here is the previous discussion
http://www.spinics.net/lists/sparclinux/msg16441.html

Hi Babu,

I finally got some cycles to poke at this today.  Good work.  A couple of
suggestions.  For bisectability, I am thinking patch2 should be first and
patch1 and patch3 should be combined.  Also watchdog_hld.c is going to need
up top:

#define pr_fmt(fmt) "NMI watchdog: " fmt

otherwise the error messages miss the header.

Though I don't think watchdog.c and watchdog_hld.c should have the same
header.  A good solution isn't coming to me right now.  I will try to run
some tests on this tomorrow.


   Don,  Thanks for the feedback.  Let me know if you into issues with 
your tests. I will start working on the

   review comments.




Cheers,
Don


Babu Moger (4):
   watchdog: Remove hardlockup handler references
   watchdog: Move shared definitions to nmi.h
   watchdog: Move hardlockup detector in separate file
   sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

  arch/sparc/kernel/nmi.c |   44 -
  include/linux/nmi.h |   19 
  kernel/Makefile |1 +
  kernel/watchdog.c   |  276 ++-
  kernel/watchdog_hld.c   |  238 
  5 files changed, 312 insertions(+), 266 deletions(-)
  create mode 100644 kernel/watchdog_hld.c

Re: [RFC PATCH 0/4] Clean up watchdog handlers

2016-10-31 Thread Babu Moger



On 10/31/2016 4:00 PM, Don Zickus wrote:

On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:

This is an attempt to cleanup watchdog handlers. Right now,
kernel/watchdog.c implements both softlockup and hardlockup detectors.
Softlockup code is generic. Hardlockup code is arch specific. Some
architectures don't use hardlockup detectors. They use their own watchdog
detectors. To make both these combination work, we have numerous #ifdefs
in kernel/watchdog.c.

We are trying here to make these handlers independent of each other.
Also provide an interface for architectures to implement their own
handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
as weak such that architectures can override its definitions.

Thanks to Don Zickus for his suggestions.
Here is the previous discussion
http://www.spinics.net/lists/sparclinux/msg16441.html

Hi Babu,

I finally got some cycles to poke at this today.  Good work.  A couple of
suggestions.  For bisectability, I am thinking patch2 should be first and
patch1 and patch3 should be combined.  Also watchdog_hld.c is going to need
up top:

#define pr_fmt(fmt) "NMI watchdog: " fmt

otherwise the error messages miss the header.

Though I don't think watchdog.c and watchdog_hld.c should have the same
header.  A good solution isn't coming to me right now.  I will try to run
some tests on this tomorrow.
Don, Thanks for the feedback. Let me know if you run into problems with 
your tests.

I will start working on the comments.
Thanks
Babu



Cheers,
Don


Babu Moger (4):
   watchdog: Remove hardlockup handler references
   watchdog: Move shared definitions to nmi.h
   watchdog: Move hardlockup detector in separate file
   sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

  arch/sparc/kernel/nmi.c |   44 -
  include/linux/nmi.h |   19 
  kernel/Makefile |1 +
  kernel/watchdog.c   |  276 ++-
  kernel/watchdog_hld.c   |  238 
  5 files changed, 312 insertions(+), 266 deletions(-)
  create mode 100644 kernel/watchdog_hld.c

Re: [RFC PATCH 0/4] Clean up watchdog handlers

2016-11-01 Thread Babu Moger



On 11/1/2016 8:20 AM, Don Zickus wrote:

On Mon, Oct 31, 2016 at 04:30:59PM -0500, Babu Moger wrote:

On 10/31/2016 4:00 PM, Don Zickus wrote:

On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:

This is an attempt to cleanup watchdog handlers. Right now,
kernel/watchdog.c implements both softlockup and hardlockup detectors.
Softlockup code is generic. Hardlockup code is arch specific. Some
architectures don't use hardlockup detectors. They use their own watchdog
detectors. To make both these combination work, we have numerous #ifdefs
in kernel/watchdog.c.

We are trying here to make these handlers independent of each other.
Also provide an interface for architectures to implement their own
handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
as weak such that architectures can override its definitions.

Thanks to Don Zickus for his suggestions.
Here is the previous discussion
http://www.spinics.net/lists/sparclinux/msg16441.html

Hi Babu,

I finally got some cycles to poke at this today.  Good work.  A couple of
suggestions.  For bisectability, I am thinking patch2 should be first and
patch1 and patch3 should be combined.  Also watchdog_hld.c is going to need
up top:

#define pr_fmt(fmt) "NMI watchdog: " fmt

otherwise the error messages miss the header.

Though I don't think watchdog.c and watchdog_hld.c should have the same
header.  A good solution isn't coming to me right now.  I will try to run
some tests on this tomorrow.

Don, Thanks for the feedback. Let me know if you run into problems with your
tests.

Hi Babu,

My tests passed.  I just have to tweak the expected output lines as they
constantly change. :-(

I am going to play with different config options to see if things break from
a compile perspective.


Don, Great. Thanks for the update.  I had couple of compilation issues 
with different config options.


1. drivers/edac/edac_device.o:(.discard+0x0): multiple definition of 
`__pcpu_unique_hrtimer_interrupts'

drivers/edac/edac_mc.o:(.discard+0x0): first defined here

This was a problem with uni processor config.  Thinking of moving the 
definition of hrtimer_interrupts and is_hardlockup

into watchdog.c as softlockup code does most of the work here.

2. kernel/built-in.o: In function `watchdog_overflow_callback':
  >> watchdog_hld.c:(.text+0x56940): undefined reference to 
`sysctl_hardlockup_all_cpu_backtrace'


Moved this definition to nmi.h.
Will post the v2 version soon with all the comments included.

Thanks
Babu



I will start working on the comments.

Great.

Cheers,
Don


Thanks
Babu


Cheers,
Don


Babu Moger (4):
   watchdog: Remove hardlockup handler references
   watchdog: Move shared definitions to nmi.h
   watchdog: Move hardlockup detector in separate file
   sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

  arch/sparc/kernel/nmi.c |   44 -
  include/linux/nmi.h |   19 
  kernel/Makefile |1 +
  kernel/watchdog.c   |  276 ++-
  kernel/watchdog_hld.c   |  238 
  5 files changed, 312 insertions(+), 266 deletions(-)
  create mode 100644 kernel/watchdog_hld.c

Re: [PATCH v2 0/3] Clean up watchdog handlers

2016-11-04 Thread Babu Moger



On 11/4/2016 11:25 AM, Don Zickus wrote:

On Tue, Nov 01, 2016 at 02:13:43PM -0700, Babu Moger wrote:

This is an attempt to cleanup watchdog handlers. Right now,
kernel/watchdog.c implements both softlockup and hardlockup detectors.
Softlockup code is generic. Hardlockup code is arch specific. Some
architectures don't use hardlockup detectors. They use their own watchdog
detectors. To make both these combination work, we have numerous #ifdefs
in kernel/watchdog.c.

We are trying here to make these handlers independent of each other.
Also provide an interface for architectures to implement their own
handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
as weak such that architectures can override its definitions.

Thanks to Don Zickus for his suggestions.
Here are our previous discussions
http://www.spinics.net/lists/sparclinux/msg16543.html
http://www.spinics.net/lists/sparclinux/msg16441.html


Hi Babu,

Thanks for the patches.  It passes my panic/reboot testing.  The patches
look good for now.  Though this change has me thinking about other cleanup
changes I can make on top of this.  But I am going to hold off for now until
we are sure nothing really broke.  As this should be a straight forward
split.

The only odd thing for me is I am having trouble disabling
CONFIG_HARDLOCKUP_DETECTOR.  For some reason def_bool y, is forcing the
option on despite my repeated attempts to disable it.  I had to rename the
option to do some test compiling and verify it doesn't regress when
disabled.  Probably my environment..


  Don, You are welcome. Thanks for your feedback to resolve this.



Thanks for the work Babu!

Acked-by: Don Zickus <dzic...@redhat.com>



v2:
   Addressed few comments from Don Zickus.
   1. Took care of bisectability issue. Previous patch2 is patch1 now.
  Combined patch 1 and 3. Patch 4 is now patch 3.
   2. Added pr_fmt back in watchdog_hld.c
   3. Tweaked the file headers for watchdog.c and watchdog_hld.c.

   4. Took care of couple of config compile issues.

drivers/edac/edac_device.o:(.discard+0x0): multiple definition of 
`__pcpu_unique_hrtimer_interrupts'
drivers/edac/edac_mc.o:(.discard+0x0): first defined here
This was a problem with uni processor config. Moved the definition of 
hrtimer_interrupts and
is_hardlockup into watchdog.c as softlockup code does most of the work here.
is_hardlockup kind of generic most part.

kernel/built-in.o: In function `watchdog_overflow_callback':
watchdog_hld.c:(.text+0x56940): undefined reference to 
`sysctl_hardlockup_all_cpu_backtrace'
Moved this definition to nmi.h.

v1:
   Initial version
Babu Moger (3):
   watchdog: Move shared definitions to nmi.h
   watchdog: Move hardlockup detector to separate file
   sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

  arch/sparc/kernel/nmi.c |   44 -
  include/linux/nmi.h |   24 
  kernel/Makefile |1 +
  kernel/watchdog.c   |  270 +++
  kernel/watchdog_hld.c   |  227 +++
  5 files changed, 310 insertions(+), 256 deletions(-)
  create mode 100644 kernel/watchdog_hld.c

[PATCH v2 0/3] Clean up watchdog handlers

2016-11-01 Thread Babu Moger

This is an attempt to cleanup watchdog handlers. Right now,
kernel/watchdog.c implements both softlockup and hardlockup detectors.
Softlockup code is generic. Hardlockup code is arch specific. Some
architectures don't use hardlockup detectors. They use their own watchdog
detectors. To make both these combination work, we have numerous #ifdefs
in kernel/watchdog.c.

We are trying here to make these handlers independent of each other.
Also provide an interface for architectures to implement their own
handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
as weak such that architectures can override its definitions.

Thanks to Don Zickus for his suggestions.
Here are our previous discussions
http://www.spinics.net/lists/sparclinux/msg16543.html
http://www.spinics.net/lists/sparclinux/msg16441.html

v2:
  Addressed few comments from Don Zickus.
  1. Took care of bisectability issue. Previous patch2 is patch1 now.
 Combined patch 1 and 3. Patch 4 is now patch 3.
  2. Added pr_fmt back in watchdog_hld.c
  3. Tweaked the file headers for watchdog.c and watchdog_hld.c.

  4. Took care of couple of config compile issues.

   drivers/edac/edac_device.o:(.discard+0x0): multiple definition of 
`__pcpu_unique_hrtimer_interrupts'
   drivers/edac/edac_mc.o:(.discard+0x0): first defined here
   This was a problem with uni processor config. Moved the definition of 
hrtimer_interrupts and
   is_hardlockup into watchdog.c as softlockup code does most of the work here.
   is_hardlockup kind of generic most part.

   kernel/built-in.o: In function `watchdog_overflow_callback':
   watchdog_hld.c:(.text+0x56940): undefined reference to 
`sysctl_hardlockup_all_cpu_backtrace'
   Moved this definition to nmi.h.

v1:
  Initial version
Babu Moger (3):
  watchdog: Move shared definitions to nmi.h
  watchdog: Move hardlockup detector to separate file
  sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

 arch/sparc/kernel/nmi.c |   44 -
 include/linux/nmi.h |   24 
 kernel/Makefile |1 +
 kernel/watchdog.c   |  270 +++
 kernel/watchdog_hld.c   |  227 +++
 5 files changed, 310 insertions(+), 256 deletions(-)
 create mode 100644 kernel/watchdog_hld.c

[PATCH v2 1/3] watchdog: Move shared definitions to nmi.h

2016-11-01 Thread Babu Moger

Move shared macros and definitions to nmi.h so that watchdog.c,
new file watchdog_hld.c or any other architecture specific handler
can use those definitions.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 include/linux/nmi.h |   24 
 kernel/watchdog.c   |   28 
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a78c35c..aacca82 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,23 @@
 #include 
 #include 
 
+/*
+ * The run state of the lockup detectors is controlled by the content of the
+ * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
+ * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
+ *
+ * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
+ * are variables that are only used as an 'interface' between the parameters
+ * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
+ * 'watchdog_thresh' variable is handled differently because its value is not
+ * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
+ * is equal zero.
+ */
+#define NMI_WATCHDOG_ENABLED_BIT   0
+#define SOFT_WATCHDOG_ENABLED_BIT  1
+#define NMI_WATCHDOG_ENABLED  (1 << NMI_WATCHDOG_ENABLED_BIT)
+#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  * 
@@ -91,9 +108,16 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
+extern unsigned long watchdog_enabled;
 extern unsigned long *watchdog_cpumask_bits;
+#ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
+#endif
+extern bool is_hardlockup(void);
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
 void __user *, size_t *, loff_t *);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..0424301 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,29 +27,12 @@
 #include 
 #include 
 
-/*
- * The run state of the lockup detectors is controlled by the content of the
- * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
- * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
- *
- * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
- * are variables that are only used as an 'interface' between the parameters
- * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
- * 'watchdog_thresh' variable is handled differently because its value is not
- * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
- * is equal zero.
- */
-#define NMI_WATCHDOG_ENABLED_BIT   0
-#define SOFT_WATCHDOG_ENABLED_BIT  1
-#define NMI_WATCHDOG_ENABLED  (1 << NMI_WATCHDOG_ENABLED_BIT)
-#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
-
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
 #else
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
 int __read_mostly nmi_watchdog_enabled;
 int __read_mostly soft_watchdog_enabled;
@@ -59,9 +42,6 @@
 #ifdef CONFIG_SMP
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
-#else
-#define sysctl_softlockup_all_cpu_backtrace 0
-#define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
 static struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(_cpumask);
@@ -289,7 +269,7 @@ void touch_softlockup_watchdog_sync(void)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
-static bool is_hardlockup(void)
+bool is_hardlockup(void)
 {
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
-- 
1.7.1

[PATCH v2 3/3] sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable

2016-11-01 Thread Babu Moger

Implement functions watchdog_nmi_enable and watchdog_nmi_disable
to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog
handler. Currently, we do not have a way to enable/disable nmi watchdog
dynamically. With these patches we can enable or disable arch
specific nmi watchdogs using proc or sysctl interface.

Example commands.
To enable: echo 1 >  /proc/sys/kernel/nmi_watchdog
To disable: echo 0 >  /proc/sys/kernel/nmi_watchdog

It can also achieved using the sysctl parameter kernel.nmi_watchdog

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/kernel/nmi.c |   44 +++-
 1 files changed, 43 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a9973bb..95e73c6 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -42,7 +42,7 @@
  */
 atomic_t nmi_active = ATOMIC_INIT(0);  /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-
+static int nmi_init_done;
 static unsigned int nmi_hz = HZ;
 static DEFINE_PER_CPU(short, wd_enabled);
 static int endflag __initdata;
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 
 void stop_nmi_watchdog(void *unused)
 {
+   if (!__this_cpu_read(wd_enabled))
+   return;
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__this_cpu_write(wd_enabled, 0);
atomic_dec(_active);
@@ -207,6 +209,9 @@ static int __init check_nmi_watchdog(void)
 
 void start_nmi_watchdog(void *unused)
 {
+   if (__this_cpu_read(wd_enabled))
+   return;
+
__this_cpu_write(wd_enabled, 1);
atomic_inc(_active);
 
@@ -259,6 +264,8 @@ int __init nmi_init(void)
}
}
 
+   nmi_init_done = 1;
+
return err;
 }
 
@@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str)
return 0;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * sparc specific NMI watchdog enable function.
+ * Enables watchdog if it is not enabled already.
+ */
+int watchdog_nmi_enable(unsigned int cpu)
+{
+   if (atomic_read(_active) == -1) {
+   pr_warn("NMI watchdog cannot be enabled or disabled\n");
+   return -1;
+   }
+
+   /*
+* watchdog thread could start even before nmi_init is called.
+* Just Return in that case. Let nmi_init finish the init
+* process first.
+*/
+   if (!nmi_init_done)
+   return 0;
+
+   smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
+
+   return 0;
+}
+/*
+ * sparc specific NMI watchdog disable function.
+ * Disables watchdog if it is not disabled already.
+ */
+void watchdog_nmi_disable(unsigned int cpu)
+{
+   if (atomic_read(_active) == -1)
+   pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
+   else
+   smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
+}
-- 
1.7.1

[PATCH RESEND v3 1/2] config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc

2016-11-02 Thread Babu Moger

This new config parameter limits the space used for "Lock debugging:
prove locking correctness" by about 4MB. The current sparc systems have
the limitation of 32MB size for kernel size including .text, .data and
.bss sections. With PROVE_LOCKING feature, the kernel size could grow
beyond this limit and causing system boot-up issues. With this option,
kernel limits the size of the entries of lock_chains, stack_trace etc.,
so that kernel fits in required size limit. This is not visible to user
and only used for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Acked-by: Sam Ravnborg <s...@ravnborg.org>
---
 arch/sparc/Kconfig |1 +
 lib/Kconfig.debug  |3 +++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b23c76b..a85e51d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
select HAVE_ARCH_HARDENED_USERCOPY
+   select PROVE_LOCKING_SMALL if PROVE_LOCKING
 
 config SPARC32
def_bool !64BIT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b01e547..a6c8db1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1085,6 +1085,9 @@ config PROVE_LOCKING
 
 For more details, see Documentation/locking/lockdep-design.txt.
 
+config PROVE_LOCKING_SMALL
+   bool
+
 config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT 
&& LOCKDEP_SUPPORT
-- 
1.7.1

[PATCH RESEND v3 2/2] lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined

2016-11-02 Thread Babu Moger

Reduce the size of data structure for lockdep entries by half if
PROVE_LOCKING_SMALL if defined. This is used only for sparc.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Acked-by: Sam Ravnborg <s...@ravnborg.org>
---
 kernel/locking/lockdep_internals.h |   20 +---
 1 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/lockdep_internals.h 
b/kernel/locking/lockdep_internals.h
index 51c4b24..c2b8849 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,6 +46,14 @@ enum {
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
 
 /*
+ * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * .data and .bss to fit in required 32MB limit for the kernel. With
+ * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * So, reduce the static allocations for lockdeps related structures so that
+ * everything fits in current required size limit.
+ */
+#ifdef CONFIG_PROVE_LOCKING_SMALL
+/*
  * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
  * we track.
  *
@@ -54,18 +62,24 @@ enum {
  * table (if it's not there yet), and we check it for lock order
  * conflicts and deadlocks.
  */
+#define MAX_LOCKDEP_ENTRIES16384UL
+#define MAX_LOCKDEP_CHAINS_BITS15
+#define MAX_STACK_TRACE_ENTRIES262144UL
+#else
 #define MAX_LOCKDEP_ENTRIES32768UL
 
 #define MAX_LOCKDEP_CHAINS_BITS16
-#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
-
-#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 /*
  * Stack-trace: tightly packed array of stack backtrace
  * addresses. Protected by the hash_lock.
  */
 #define MAX_STACK_TRACE_ENTRIES524288UL
+#endif
+
+#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 extern struct list_head all_lock_classes;
 extern struct lock_chain lock_chains[];
-- 
1.7.1

[PATCH RESEND v3 0/2] Adjust lockdep static allocations for sparc

2016-11-02 Thread Babu Moger

Looks like these patches are lost in the mix. Resending with following note.

Dave, This requires your Ack as it touches sparc. Peter is waiting
for your Ack to queue it. Here is our previous discussion.
http://marc.info/?t=14750048631=1=2

These patches limit the static allocations for lockdep data structures
used for debugging locking correctness. For sparc, all the kernel's code,
data, and bss, must have locked translations in the TLB so that we don't
get TLB misses on kernel code and data. Current sparc chips have 8 TLB
entries available that may be locked down, and with a 4mb page size,
this gives a maximum of 32MB. With PROVE_LOCKING we could go over this
limit and cause system boot-up problems. These patches limit the static
allocations so that everything fits in current required size limit.

patch 1 : Adds new config parameter CONFIG_PROVE_LOCKING_SMALL
Patch 2 : Adjusts the sizes based on the new config parameter

v2-> v3:
   Some more comments from Sam Ravnborg and Peter Zijlstra.
   Defined PROVE_LOCKING_SMALL as invisible and moved the selection to
   arch/sparc/Kconfig. 

v1-> v2:
   As suggested by Peter Zijlstra, keeping the default as is.
   Introduced new config variable CONFIG_PROVE_LOCKING_SMALL
   to handle sparc specific case.

v0:
   Initial revision.

Babu Moger (2):
  config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL
for sparc
  lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined

 arch/sparc/Kconfig |1 +
 kernel/locking/lockdep_internals.h |   20 +---
 lib/Kconfig.debug  |3 +++
 3 files changed, 21 insertions(+), 3 deletions(-)

[PATCH v2 2/3] watchdog: Move hardlockup detector to separate file

2016-11-01 Thread Babu Moger

Separate hardlockup code from watchdog.c and move it to watchdog_hld.c.
It is mostly straight forward. Remove everything inside
CONFIG_HARDLOCKUP_DETECTORS. This code will go to file watchdog_hld.c.
Also update the makefile accordigly.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/Makefile   |1 +
 kernel/watchdog.c |  242 ++--
 kernel/watchdog_hld.c |  227 ++
 3 files changed, 239 insertions(+), 231 deletions(-)
 create mode 100644 kernel/watchdog_hld.c

diff --git a/kernel/Makefile b/kernel/Makefile
index eb26e12..314e7d6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 0424301..d4b0fa0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,7 +24,6 @@
 
 #include 
 #include 
-#include 
 #include 
 
 static DEFINE_MUTEX(watchdog_proc_mutex);
@@ -80,50 +79,9 @@
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static DEFINE_PER_CPU(bool, hard_watchdog_warn);
-static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
-static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-#endif
 static unsigned long soft_lockup_nmi_warn;
 
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-unsigned int __read_mostly hardlockup_panic =
-   CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
-static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
-   watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
-   if (!strncmp(str, "panic", 5))
-   hardlockup_panic = 1;
-   else if (!strncmp(str, "nopanic", 7))
-   hardlockup_panic = 0;
-   else if (!strncmp(str, "0", 1))
-   watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-   else if (!strncmp(str, "1", 1))
-   watchdog_enabled |= NMI_WATCHDOG_ENABLED;
-   return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-#endif
-
 unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
 
@@ -244,30 +202,12 @@ void touch_all_softlockup_watchdogs(void)
wq_watchdog_touch(-1);
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-void touch_nmi_watchdog(void)
-{
-   /*
-* Using __raw here because some code paths have
-* preemption enabled.  If preemption is enabled
-* then interrupts should be enabled too, in which
-* case we shouldn't have to worry about the watchdog
-* going off.
-*/
-   raw_cpu_write(watchdog_nmi_touch, true);
-   touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-#endif
-
 void touch_softlockup_watchdog_sync(void)
 {
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, 0);
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
 bool is_hardlockup(void)
 {
@@ -279,7 +219,6 @@ bool is_hardlockup(void)
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
 }
-#endif
 
 static int is_softlockup(unsigned long touch_ts)
 {
@@ -293,78 +232,22 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-
-static struct perf_event_attr wd_hw_attr = {
-   .type   = PERF_TYPE_HARDWARE,
-   .config = PERF_COUNT_HW_CPU_CYCLES,
-   .size   = sizeof(struct perf_event_attr),
-   .pinned = 1,
-   .disabled   = 1,
-};
-
-/* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event,
-struct perf_sample_data *data,
-struct pt_regs *regs)
-{
-   /* Ensure the watchdog never gets throttled */

[PATCH v2 2/2] sparc: Implement arch_watchdog_nmi_enable and arch_watchdog_nmi_disable

2016-10-13 Thread Babu Moger

Implement functions arch_watchdog_nmi_enable and arch_watchdog_nmi_disable
to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog
handler. Currently, we do not have a way to enable/disable nmi watchdog
dynamically. With these patches we can enable or disable arch
specific nmi watchdogs using proc or sysctl interface.

Example commands.
To enable: echo 1 >  /proc/sys/kernel/nmi_watchdog
To disable: echo 0 >  /proc/sys/kernel/nmi_watchdog

It can also achieved using the sysctl parameter kernel.nmi_watchdog

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/kernel/nmi.c |   41 -
 1 files changed, 40 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a9973bb..d7e2c01 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -42,7 +42,7 @@ static int panic_on_timeout;
  */
 atomic_t nmi_active = ATOMIC_INIT(0);  /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-
+static int nmi_init_done;
 static unsigned int nmi_hz = HZ;
 static DEFINE_PER_CPU(short, wd_enabled);
 static int endflag __initdata;
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 
 void stop_nmi_watchdog(void *unused)
 {
+   if (!__this_cpu_read(wd_enabled))
+   return;
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__this_cpu_write(wd_enabled, 0);
atomic_dec(_active);
@@ -207,6 +209,9 @@ error:
 
 void start_nmi_watchdog(void *unused)
 {
+   if (__this_cpu_read(wd_enabled))
+   return;
+
__this_cpu_write(wd_enabled, 1);
atomic_inc(_active);
 
@@ -259,6 +264,8 @@ int __init nmi_init(void)
}
}
 
+   nmi_init_done = 1;
+
return err;
 }
 
@@ -270,3 +277,35 @@ static int __init setup_nmi_watchdog(char *str)
return 0;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * sparc specific NMI watchdog enable function.
+ * Enables watchdog if it is not enabled already.
+ */
+int arch_watchdog_nmi_enable(unsigned int cpu)
+{
+   if (atomic_read(_active) == -1) {
+   pr_info_once("NMI watchdog cannot be enabled\n");
+   return -1;
+   }
+
+   /*
+* watchdog thread could start even before nmi_init is called.
+* Just Return in that case. Let nmi_init finish the init
+* process first.
+*/
+   if (!nmi_init_done)
+   return 0;
+
+   smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
+
+   return 0;
+}
+/*
+ * sparc specific NMI watchdog disable function.
+ * Disables watchdog if it is not disabled already.
+ */
+void arch_watchdog_nmi_disable(unsigned int cpu)
+{
+   smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
+}
-- 
1.7.1

[PATCH v2 0/2] Introduce arch specific nmi enable, disable handlers

2016-10-13 Thread Babu Moger

During our testing we noticed that nmi watchdogs in sparc could not be disabled 
or
enabled dynamically using sysctl/proc interface. Sparc uses its own arch 
specific
nmi watchdogs. There is a sysctl and proc 
interface(proc/sys/kernel/nmi_watchdog)
to enable/disable nmi watchdogs. However, that is not working for sparc. There
is no interface to feed this parameter to arch specific nmi watchdogs.

These patches extend the same sysctl/proc interface to enable or disable
these arch specific nmi watchdogs dynamically. Introduced new functions
arch_watchdog_nmi_enable and arch_watchdog_nmi_disable which can be implemented
in arch specific handlers.
If you think there is a better way to do this. Please advice.

Tested on sparc. Compile tested on x86.

v2:
  a)Sam Ravnborg's comments about making the definitions visible.
  With the new approach we dont need those definitions((NMI_WATCHDOG_ENABLED,
  SOFT_WATCHDOG_ENABLED etc..) outside watchdog.c. So no action.

  b) Made changes per Don Zickus comments.
  Don, I could not use your patches as is. Reason is sparc does not define
  CONFIG_HARDLOCKUP_DETECTOR. So, defining default __weak function did not
  work for me. However, I have used your idea to define __weak functions
  arch_watchdog_nmi_enable and arch_watchdog_nmi_disable when 
CONFIG_HARDLOCKUP_DETECTOR
  is not defined. I feel this should have very less impact on the races you are
  concerned about. Please take a look. Feel free to suggest.

  Patch2 changes: I had to introduce new variable nmi_init_done to synchronize
  watchdog thread and kernel init thread.

v1:
 Initial version. Discussion thread here
 http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1245427.html

Babu Moger (2):
  watchdog: Introduce arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable
  sparc: Implement arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable

 arch/sparc/kernel/nmi.c |   41 +-
 kernel/watchdog.c   |   65 +++---
 2 files changed, 84 insertions(+), 22 deletions(-)

Re: [PATCH 0/2] Introduce update_arch_nmi_watchdog for arch specific handlers

2016-10-13 Thread Babu Moger



On 10/7/2016 10:51 AM, Don Zickus wrote:

On Thu, Oct 06, 2016 at 03:16:41PM -0700, Babu Moger wrote:

During our testing we noticed that nmi watchdogs in sparc could not be disabled 
or
enabled dynamically using sysctl/proc interface. Sparc uses its own arch 
specific
nmi watchdogs. There is a sysctl and proc 
interface(proc/sys/kernel/nmi_watchdog)
to enable/disable nmi watchdogs. However, that is not working for sparc. There
is no interface to feed this parameter to arch specific nmi watchdogs.

These patches extend the same sysctl/proc interface to enable or disable
these arch specific nmi watchdogs dynamically. Introduced new function
update_arch_nmi_watchdog which can be implemented in arch specific handlers.
If you think there is a better way to do this. Please advice.

Tested on sparc. Compile tested on x86.

Hi Babu,

Thanks for the patch.  Yeah, I don't test sparc at all (lack of hardware).
Sorry about that.

We did spend quite a bit of time trying to get various soft/hard lockup
logic going for the /proc stuff and I am wondering if your patches are to
simple and expose some of the races we tried to fix.

Therefore I am wondering if we could re-use some of our logic for your case.

The perf stuff is really the x86 equivalent of arch_watchdog_enable.  I am
wondering if we break that out as a __weak default function and then have
sparc override it with its own enable/disable functions.  Something along
the lines below (compiled on x86 but untested)?
Hi Don, Sorry for the late response. I ran into issues with the setups 
and new approach.
 I could not use your patches as is. Reason is sparc does not define 
CONFIG_HARDLOCKUP_DETECTOR.
 So, defining default __weak function did not  work for me. However, I 
have used your idea to
define __weak functions  arch_watchdog_nmi_enable and 
arch_watchdog_nmi_disable when

CONFIG_HARDLOCKUP_DETECTOR  is not defined. Sending v2 version now.
Please take a look. Thanks for your inputs.


Cheers,
Don


diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..55cd2d3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu)
   */
  static unsigned long cpu0_err;
  
-static int watchdog_nmi_enable(unsigned int cpu)

+int __weak arch_watchdog_nmi_enable(unsigned int cpu)
  {
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
-	/* nothing to do if the hard lockup detector is disabled */

-   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-   goto out;
-
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
@@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu)
goto out_save;
}
  
-	/*

-* Disable the hard lockup detector if _any_ CPU fails to set up
-* set up the hardware perf event. The watchdog() function checks
-* the NMI_WATCHDOG_ENABLED bit periodically.
-*
-* The barriers are for syncing up watchdog_enabled across all the
-* cpus, as clear_bit() does not use barriers.
-*/
-   smp_mb__before_atomic();
-   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
-   smp_mb__after_atomic();
-
/* skip displaying the same error again */
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
return PTR_ERR(event);
@@ -658,7 +642,36 @@ out:
return 0;
  }
  
-static void watchdog_nmi_disable(unsigned int cpu)

+static int watchdog_nmi_enable(unsigned int cpu)
+{
+   int err;
+
+   /* nothing to do if the hard lockup detector is disabled */
+   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+   return 0;
+
+   err = arch_watchdog_nmi_enable(cpu);
+
+   if (err) {
+   /*
+* Disable the hard lockup detector if _any_ CPU fails to set up
+* set up the hardware perf event. The watchdog() function 
checks
+* the NMI_WATCHDOG_ENABLED bit periodically.
+*
+* The barriers are for syncing up watchdog_enabled across all 
the
+* cpus, as clear_bit() does not use barriers.
+*/
+   smp_mb__before_atomic();
+   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
+   smp_mb__after_atomic();
+
+   return err;
+   }
+
+   return 0;
+}
+
+void __weak arch_watchdog_nmi_disable(unsigned int cpu)
  {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
@@ -675,6 +688,11 @@ static void watchdog_nmi_disable(unsigned int cpu)

}
  }
  
+static void watchdog_nmi_disable(unsigned int cpu)

+{
+   arch_watchdog_nmi_disable(cpu);
+}
+
  #else
  static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
  static void watchdog_nmi_disable(unsigned int cpu) { return; }

[PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and arch_watchdog_nmi_disable

2016-10-13 Thread Babu Moger

Currently we do not have a way to enable/disable arch specific
watchdog handlers if it was implemented by any of the architectures.

This patch introduces new functions arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable which can be used to enable/disable architecture
specific NMI watchdog handlers. These functions are defined as weak as
architectures can override their definitions to enable/disable nmi
watchdog behaviour.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 kernel/watchdog.c |   65 +++-
 1 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..d1e84e6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -46,7 +46,7 @@
 
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
 static unsigned long __read_mostly watchdog_enabled = 
SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
 #else
 static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
@@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu)
  */
 static unsigned long cpu0_err;
 
-static int watchdog_nmi_enable(unsigned int cpu)
+static int arch_watchdog_nmi_enable(unsigned int cpu)
 {
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
-   /* nothing to do if the hard lockup detector is disabled */
-   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-   goto out;
-
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
@@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu)
goto out_save;
}
 
-   /*
-* Disable the hard lockup detector if _any_ CPU fails to set up
-* set up the hardware perf event. The watchdog() function checks
-* the NMI_WATCHDOG_ENABLED bit periodically.
-*
-* The barriers are for syncing up watchdog_enabled across all the
-* cpus, as clear_bit() does not use barriers.
-*/
-   smp_mb__before_atomic();
-   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
-   smp_mb__after_atomic();
-
/* skip displaying the same error again */
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
return PTR_ERR(event);
@@ -658,7 +642,7 @@ out:
return 0;
 }
 
-static void watchdog_nmi_disable(unsigned int cpu)
+static void arch_watchdog_nmi_disable(unsigned int cpu)
 {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
@@ -676,8 +660,13 @@ static void watchdog_nmi_disable(unsigned int cpu)
 }
 
 #else
-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
-static void watchdog_nmi_disable(unsigned int cpu) { return; }
+/*
+ * These two functions are mostly architecture specific
+ * defining them as weak here.
+ */
+int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; }
+void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; }
+
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 static struct smp_hotplug_thread watchdog_threads = {
@@ -781,6 +770,40 @@ void lockup_detector_resume(void)
put_online_cpus();
 }
 
+void watchdog_nmi_disable(unsigned int cpu)
+{
+   arch_watchdog_nmi_disable(cpu);
+}
+
+int watchdog_nmi_enable(unsigned int cpu)
+{
+   int err;
+
+   /* nothing to do if the hard lockup detector is disabled */
+   if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+   return 0;
+
+   err = arch_watchdog_nmi_enable(cpu);
+
+   if (err) {
+   /*
+* Disable the hard lockup detector if _any_ CPU fails to set up
+* set up the hardware perf event. The watchdog() function 
checks
+* the NMI_WATCHDOG_ENABLED bit periodically.
+*
+* The barriers are for syncing up watchdog_enabled across all 
the
+* cpus, as clear_bit() does not use barriers.
+*/
+   smp_mb__before_atomic();
+   clear_bit(NMI_WATCHDOG_ENABLED_BIT, _enabled);
+   smp_mb__after_atomic();
+
+   return err;
+   }
+
+   return 0;
+}
+
 static int update_watchdog_all_cpus(void)
 {
int ret;
-- 
1.7.1

[PATCH v2] arch/sparc: Avoid DCTI Couples

2017-03-17 Thread Babu Moger

Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated.
Also address the "Programming Note" for optimal performance.

Here is the complete text from Oracle SPARC Architecture Specs.

6.3.4.7 DCTI Couples
"A delayed control transfer instruction (DCTI) in the delay slot of
another DCTI is referred to as a “DCTI couple”. The use of DCTI couples
is deprecated in the Oracle SPARC Architecture; no new software should
place a DCTI in the delay slot of another DCTI, because on future Oracle
SPARC Architecture implementations DCTI couples may execute either
slowly or differently than the programmer assumes it will.

SPARC V8 and SPARC V9 Compatibility Note
The SPARC V8 architecture left behavior undefined for a DCTI couple. The
SPARC V9 architecture defined behavior in that case, but as of
UltraSPARC Architecture 2005, use of DCTI couples was deprecated.
Software should not expect high performance from DCTI couples, and
performance of DCTI couples should be expected to decline further in
future processors.

Programming Note
As noted in TABLE 6-5 on page 115, an annulled branch-always
(branch-always with a = 1) instruction is not architecturally a DCTI.
However, since not all implementations make that distinction, for
optimal performance, a DCTI should not be placed in the instruction word
immediately following an annulled branch-always instruction (BA,A or
BPA,A)."

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Reviewed-by: Rob Gardner <rob.gard...@oracle.com>
---
 arch/sparc/kernel/head_64.S|4 
 arch/sparc/kernel/misctrap.S   |1 +
 arch/sparc/kernel/rtrap_64.S   |1 +
 arch/sparc/kernel/spiterrs.S   |1 +
 arch/sparc/kernel/sun4v_tlb_miss.S |1 +
 arch/sparc/kernel/urtt_fill.S  |1 +
 arch/sparc/kernel/winfixup.S   |2 ++
 arch/sparc/lib/NG2memcpy.S |4 
 arch/sparc/lib/NG4memcpy.S |1 +
 arch/sparc/lib/NG4memset.S |1 +
 arch/sparc/lib/NGmemcpy.S  |1 +
 11 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 6aa3da1..4410119 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -96,6 +96,7 @@ sparc64_boot:
andn%g1, PSTATE_AM, %g1
wrpr%g1, 0x0, %pstate
ba,a,pt %xcc, 1f
+nop
 
.globl  prom_finddev_name, prom_chosen_path, prom_root_node
.globl  prom_getprop_name, prom_mmu_name, prom_peer_name
@@ -613,6 +614,7 @@ niagara_tlb_fixup:
 nop
 
ba,a,pt %xcc, 80f
+nop
 niagara4_patch:
callniagara4_patch_copyops
 nop
@@ -622,6 +624,7 @@ niagara4_patch:
 nop
 
ba,a,pt %xcc, 80f
+nop
 
 niagara2_patch:
callniagara2_patch_copyops
@@ -632,6 +635,7 @@ niagara2_patch:
 nop
 
ba,a,pt %xcc, 80f
+nop
 
 niagara_patch:
callniagara_patch_copyops
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S
index 34b4933..9276d2f 100644
--- a/arch/sparc/kernel/misctrap.S
+++ b/arch/sparc/kernel/misctrap.S
@@ -82,6 +82,7 @@ do_stdfmna:
callhandle_stdfmna
 add%sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap
+nop
.size   do_stdfmna,.-do_stdfmna
 
.type   breakpoint_trap,#function
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 216948c..709a82e 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -237,6 +237,7 @@ rt_continue:ldx [%sp + 
PTREGS_OFF + PT_V9_G1], %g1
bne,pt  %xcc, user_rtt_fill_32bit
 wrpr   %g1, %cwp
ba,a,pt %xcc, user_rtt_fill_64bit
+nop
 
 user_rtt_fill_fixup_dax:
ba,pt   %xcc, user_rtt_fill_fixup_common
diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S
index 4a73009..d7e5408 100644
--- a/arch/sparc/kernel/spiterrs.S
+++ b/arch/sparc/kernel/spiterrs.S
@@ -86,6 +86,7 @@ __spitfire_cee_trap_continue:
 rd %pc, %g7
 
ba,a,pt %xcc, 2f
+nop
 
 1: ba,pt   %xcc, etrap_irq
 rd %pc, %g7
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S 
b/arch/sparc/kernel/sun4v_tlb_miss.S
index 6179e19..c19f352 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -352,6 +352,7 @@ sun4v_mna:
callsun4v_do_mna
 add%sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap
+nop
 
/* Privileged Action.  */
 sun4v_privact:
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S
index 5604a2b..364af32 100644
--- a/arch/sparc/kernel/urtt_fill.S
+++ b/arch/sparc/kernel/urtt_fill.S
@@ -92,6 +92,7 @@ user_rtt_fill_fixup_common:
cal

[PATCH] arch/sparc: Avoid DCTI Couples

2017-03-10 Thread Babu Moger

Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated.
Refer UltraSPARC Architecture 2005(Section 6.3.4.7 - DCTI Couples).
http://www.oracle.com/technetwork/systems/opensparc/1537734

"A delayed control transfer instruction (DCTI) in the delay slot of another
DCTI is referred to as a DCTI couple. The use of DCTI couples is deprecated
in the Oracle SPARC Architecture; no new software should place a DCTI in
the delay slot of another DCTI, because on future Oracle SPARC Architecture
implementations DCTI couples may execute either slowly or differently than
the programmer assumes it will."

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Reviewed-by: Rob Gardner <rob.gard...@oracle.com>
---
 arch/sparc/kernel/head_64.S|4 
 arch/sparc/kernel/misctrap.S   |1 +
 arch/sparc/kernel/rtrap_64.S   |1 +
 arch/sparc/kernel/spiterrs.S   |1 +
 arch/sparc/kernel/sun4v_tlb_miss.S |1 +
 arch/sparc/kernel/urtt_fill.S  |1 +
 arch/sparc/kernel/winfixup.S   |2 ++
 arch/sparc/lib/NG2memcpy.S |4 
 arch/sparc/lib/NG4memcpy.S |1 +
 arch/sparc/lib/NG4memset.S |1 +
 arch/sparc/lib/NGmemcpy.S  |1 +
 11 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 6aa3da1..4410119 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -96,6 +96,7 @@ sparc64_boot:
andn%g1, PSTATE_AM, %g1
wrpr%g1, 0x0, %pstate
ba,a,pt %xcc, 1f
+nop
 
.globl  prom_finddev_name, prom_chosen_path, prom_root_node
.globl  prom_getprop_name, prom_mmu_name, prom_peer_name
@@ -613,6 +614,7 @@ niagara_tlb_fixup:
 nop
 
ba,a,pt %xcc, 80f
+nop
 niagara4_patch:
callniagara4_patch_copyops
 nop
@@ -622,6 +624,7 @@ niagara4_patch:
 nop
 
ba,a,pt %xcc, 80f
+nop
 
 niagara2_patch:
callniagara2_patch_copyops
@@ -632,6 +635,7 @@ niagara2_patch:
 nop
 
ba,a,pt %xcc, 80f
+nop
 
 niagara_patch:
callniagara_patch_copyops
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S
index 34b4933..9276d2f 100644
--- a/arch/sparc/kernel/misctrap.S
+++ b/arch/sparc/kernel/misctrap.S
@@ -82,6 +82,7 @@ do_stdfmna:
callhandle_stdfmna
 add%sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap
+nop
.size   do_stdfmna,.-do_stdfmna
 
.type   breakpoint_trap,#function
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 216948c..709a82e 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -237,6 +237,7 @@ rt_continue:ldx [%sp + 
PTREGS_OFF + PT_V9_G1], %g1
bne,pt  %xcc, user_rtt_fill_32bit
 wrpr   %g1, %cwp
ba,a,pt %xcc, user_rtt_fill_64bit
+nop
 
 user_rtt_fill_fixup_dax:
ba,pt   %xcc, user_rtt_fill_fixup_common
diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S
index 4a73009..d7e5408 100644
--- a/arch/sparc/kernel/spiterrs.S
+++ b/arch/sparc/kernel/spiterrs.S
@@ -86,6 +86,7 @@ __spitfire_cee_trap_continue:
 rd %pc, %g7
 
ba,a,pt %xcc, 2f
+nop
 
 1: ba,pt   %xcc, etrap_irq
 rd %pc, %g7
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S 
b/arch/sparc/kernel/sun4v_tlb_miss.S
index 6179e19..c19f352 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -352,6 +352,7 @@ sun4v_mna:
callsun4v_do_mna
 add%sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap
+nop
 
/* Privileged Action.  */
 sun4v_privact:
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S
index 5604a2b..364af32 100644
--- a/arch/sparc/kernel/urtt_fill.S
+++ b/arch/sparc/kernel/urtt_fill.S
@@ -92,6 +92,7 @@ user_rtt_fill_fixup_common:
callsun4v_data_access_exception
 nop
ba,a,pt %xcc, rtrap
+nop
 
 1: callspitfire_data_access_exception
 nop
diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S
index 855019a..1ee173c 100644
--- a/arch/sparc/kernel/winfixup.S
+++ b/arch/sparc/kernel/winfixup.S
@@ -152,6 +152,8 @@ fill_fixup_dax:
callsun4v_data_access_exception
 nop
ba,a,pt %xcc, rtrap
+nop
 1: callspitfire_data_access_exception
 nop
ba,a,pt %xcc, rtrap
+nop
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index c629dbd..64dcd6c 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -326,11 +326,13 @@ FUNC_NAME:/*

Re: [PATCH RFC 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7

2017-08-03 Thread Babu Moger


David, Thanks for the comments. I am working on addressing your feedback.

Comments inline below.


On 7/29/2017 4:36 PM, David Miller wrote:

From: Babu Moger <babu.mo...@oracle.com>
Date: Thu, 27 Jul 2017 15:57:29 -0600


@@ -600,7 +600,7 @@ niagara_tlb_fixup:
be,pt   %xcc, niagara4_patch
 nop
cmp %g1, SUN4V_CHIP_SPARC_M7
-   be,pt   %xcc, niagara4_patch
+   be,pt   %xcc, sparc_m7_patch
 nop
cmp %g1, SUN4V_CHIP_SPARC_SN
be,pt   %xcc, niagara4_patch

This part will need to be respun now that the M8 patches are in
as there will be a slight conflict in this hunk.
Actually, these patches have been tested both on M7 and M8.  I wanted to 
add M8 also. But M8 patches were
not in the kernel yet. Now that these M8 patches(from Allen) are in the 
kernel, I can add it now.

Will update it in the second version.

+.register  %g2,#scratch
+
+   .section".text"
+   .global FUNC_NAME
+   .type   FUNC_NAME, #function
+   .align  16
+FUNC_NAME:
+   srlx%o2, 31, %g2
+   cmp %g2, 0
+   tne %xcc, 5
+   PREAMBLE
+   mov %o0, %g1! save %o0
+   brz,pn  %o2, .Lsmallx
+
+   cmp%o2, 3
+ble,pn  %icc, .Ltiny_cp
+ cmp%o2, 19
+ble,pn  %icc, .Lsmall_cp
+ or %o0, %o1, %g2
+cmp %o2, SMALL_MAX
+bl,pn   %icc, .Lmedium_cp
+ nop

What in world is going on with this indentation?

I can't comprehend how, if anyone actually put their eyes on
this code and the patch itself, wouldn't notice this.

DO NOT mix all-spaced and TAB+space indentation.

Always, consistently, use as many TABs as you can and
then when needed add trailing spaces.


Sure. Will address these problems. In general will address all the 
format issues. thanks



+.Lsrc_dst_aligned_on_8:
+   ! check if we are copying MED_MAX or more bytes
+set MED_MAX, %o3
+cmp %o2, %o3   ! limit to store buffer size
+   bgu,pn  %ncc, .Llarge_align8_copy
+nop

Again, same problem here.


+/*
+ * Handle all cases where src and dest are aligned on word
+ * boundaries. Use unrolled loops for better performance.
+ * This option wins over standard large data move when
+ * source and destination is in cache for.Lmedium
+ * to short data moves.
+ */
+set MED_WMAX, %o3
+cmp %o2, %o3   ! limit to store buffer size
+   bge,pt  %ncc, .Lunalignrejoin   ! otherwise rejoin main loop
+nop

More weird indentation.


+.dbalign:
+andcc   %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound?
+bz,pt   %ncc, .blkalign ! already long word aligned
+ sub %o3, 8, %o3 ! -(bytes till long word aligned)
+
+add %o2, %o3, %o2   ! update o2 with new count
+! Set -(%o3) bytes till sp1 long word aligned
+1:  stb %o1, [%o5]  ! there is at least 1 byte to set
+   inccc   %o3 ! byte clearing loop
+bl,pt   %ncc, 1b
+inc %o5

More weird indentation.


+! Now sp1 is block aligned
+.blkwr:
+andn%o2, 63, %o4! calculate size of blocks in bytes
+brz,pn  %o1, .wrzero! special case if c == 0
+ and %o2, 63, %o3! %o3 = bytes left after blk stores.
+
+set MIN_LOOP, %g1
+cmp %o4, %g1! check there are enough bytes to set
+   blu,pn  %ncc, .short_set! to justify cost of membar
+! must be > pre-cleared lines
+ nop

Likewise.


+
+! initial cache-clearing stores
+! get store pipeline moving
+   rd  %asi, %g3   ! save %asi to be restored later
+wr %g0, ASI_STBIMRU_P, %asi

Likewise.


+.wrzero_small:
+stxa%o1, [%o5]ASI_STBI_P
+subcc   %o4, 64, %o4
+bgu,pt  %ncc, .wrzero_small
+ add %o5, 64, %o5
+   ba,a.bsi_done

Likewise.


+.asi_done:
+   wr  %g3, 0x0, %asi  ! restored saved %asi
+.bsi_done:
+membar  #StoreStore ! required by use of Block Store Init

Likewise.


+   .size   M7memset,.-M7memset

It's usually a lot better to use ENTRY() and ENDPROC() instead of
expanding these kinds of directives out.


Ok.  Sure. Will address it.

+   .globl  m7_patch_copyops
+   .type   m7_patch_copyops,#function
+m7_patch_copyops:

ENTRY()

Sure.

+   .size   m7_patch_copyops,.-m7_patch_copyops

ENDPROC()

Sure

+   .globl  m7_patch_bzero
+   .type   m7_patch_bzero,#function
+m7_patch_bzero:

Likewise.

Ok

+   .size   m7_patch_bzero,.-m7_patch_bzero

Likewise.

Ok

+   .globl  m7_patch_pageops
+   .type   m7_patch_pageops,#function
+m7_patch_page

Re: [PATCH v2 0/4] Update memcpy, memset etc. for M7/M8 architectures

2017-08-11 Thread Babu Moger

David,  Thanks for applying.

On 8/10/2017 4:38 PM, David Miller wrote:

From: Babu Moger <babu.mo...@oracle.com>
Date: Mon,  7 Aug 2017 17:52:48 -0600

This series of patches updates the memcpy, memset, copy_to_user,
copy_from_user etc for SPARC M7/M8 architecture.

This doesn't build, you cannot assume the existence of "%ncc", it is a
recent addition.

Furthermore there is no need to ever use %ncc in v9 targetted code
anyways.

I'll fix that up, but this was a really disappointing build failure
to hit.

Thank you..

Meanwhile, two questions:

1) Is this also faster on T4 as well?  If it is, we can just get rid
of the T4 routines and use this on those chips as well.

At the time of this work, our focus was mostly on T7 and T8. We did not 
test this code on T4.
For T4 and other older configs we used NG4 versions. I would think it 
would require some

changes to make it work on T4.

2) There has been a lot of discussion and consideration put into how
a memcpy/memset routine might be really great for the local cpu
but overall pessimize performance for other cpus either locally
on the same core (contention for physical resources such as
ports to the store buffer and/or L3 cache) or on other cores.

Has any such study been done into these issues wrt. this new code?

No, we have not done this kind of study.

Re: [GIT PULL] USB/PHY patches for 4.13-rc1

2017-07-04 Thread Babu Moger




On 7/4/2017 4:09 AM, Geert Uytterhoeven wrote:

Hi Greg,

On Tue, Jul 4, 2017 at 10:04 AM, Greg KH  wrote:

On Tue, Jul 04, 2017 at 09:15:55AM +0200, Geert Uytterhoeven wrote:

On Mon, Jul 3, 2017 at 4:58 PM, Greg KH  wrote:

USB/PHY patches for 4.13-rc1
Heikki Krogerus (3):
   usb: typec: Add support for UCSI interface

Commit c1b0bc2dabfa884d ("usb: typec: Add support for UCSI interface"):


--- /dev/null
+++ b/drivers/usb/typec/ucsi/Kconfig
@@ -0,0 +1,23 @@
+config TYPEC_UCSI
+   tristate "USB Type-C Connector System Software Interface driver"
+   depends on !CPU_BIG_ENDIAN

To work as expected, and prevent this driver from being enabled on big endian
systems, this depends on "[PATCH v3 0/3] Define CPU_BIG_ENDIAN or warn for
inconsistencies".
https://lkml.org/lkml/2017/6/12/1068

Is this a problem?

I have no idea what happens if you enable the driver on big endian.


I thought that series was slated to be merged soon,
is that not going to happen?

Me too. But it's not in next-20170704.

Babu, what's the plan?


Yes. I  think these series are safe to be merged.

Max, Do you have any concerns about xtensa?



Thanks!

Gr{oetje,eeting}s,

 Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
 -- Linus Torvalds

Re: [GIT PULL] USB/PHY patches for 4.13-rc1

2017-07-06 Thread Babu Moger


On 7/6/2017 3:24 AM, Max Filippov wrote:

Hi Babu,

On Tue, Jul 4, 2017 at 10:19 AM, Babu Moger <babu.mo...@oracle.com> wrote:

Max, Do you have any concerns about xtensa?

no, not ATM. I still haven't got a chance to look closer at moving endianness
macros back to Kconfig for xtensa.



Thanks.   Greg, Can you please stage this series for the next merge.

Re: [GIT PULL] USB/PHY patches for 4.13-rc1

2017-07-06 Thread Babu Moger



On 7/6/2017 9:33 AM, Greg KH wrote:

On Thu, Jul 06, 2017 at 09:28:06AM -0500, Babu Moger wrote:

On 7/6/2017 3:24 AM, Max Filippov wrote:

Hi Babu,

On Tue, Jul 4, 2017 at 10:19 AM, Babu Moger <babu.mo...@oracle.com> wrote:

Max, Do you have any concerns about xtensa?

no, not ATM. I still haven't got a chance to look closer at moving endianness
macros back to Kconfig for xtensa.


Thanks.   Greg, Can you please stage this series for the next merge.

What series?  What exactly are you referring to here?

totally confused...


Greg, Sorry for not making it clear.  I am referring to this following 
series.


https://patchwork.kernel.org/patch/9782851/
https://patchwork.kernel.org/patch/9782843/
https://patchwork.kernel.org/patch/9782847/


greg k-h

Re: [GIT PULL] USB/PHY patches for 4.13-rc1

2017-07-06 Thread Babu Moger



On 7/6/2017 10:51 AM, Greg KH wrote:

On Thu, Jul 06, 2017 at 10:28:03AM -0500, Babu Moger wrote:

On 7/6/2017 9:33 AM, Greg KH wrote:

On Thu, Jul 06, 2017 at 09:28:06AM -0500, Babu Moger wrote:

On 7/6/2017 3:24 AM, Max Filippov wrote:

Hi Babu,

On Tue, Jul 4, 2017 at 10:19 AM, Babu Moger <babu.mo...@oracle.com> wrote:

Max, Do you have any concerns about xtensa?

no, not ATM. I still haven't got a chance to look closer at moving endianness
macros back to Kconfig for xtensa.


Thanks.   Greg, Can you please stage this series for the next merge.

What series?  What exactly are you referring to here?

totally confused...

Greg, Sorry for not making it clear.  I am referring to this following
series.

https://patchwork.kernel.org/patch/9782851/
https://patchwork.kernel.org/patch/9782843/
https://patchwork.kernel.org/patch/9782847/

Can you resend them please so I can take them from email?


Sure. Will send it in a bit. thanks



thanks,

greg k-h

[PATCH v3 1/3] arch: Define CPU_BIG_ENDIAN for all fixed big endian archs

2017-07-06 Thread Babu Moger

While working on enabling queued rwlock on SPARC, found
this following code in include/asm-generic/qrwlock.h
which uses CONFIG_CPU_BIG_ENDIAN to clear a byte.

static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
 {
return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
 }

Problem is many of the fixed big endian architectures don't define
CPU_BIG_ENDIAN and clears the wrong byte.

Define CPU_BIG_ENDIAN for all the fixed big endian architecture to fix it.

Also found few more references of this config parameter in
drivers/of/base.c
drivers/of/fdt.c
drivers/tty/serial/earlycon.c
drivers/tty/serial/serial_core.c
Be aware that this may cause regressions if someone has worked-around
problems in the above code already. Remove the work-around.

Here is our original discussion
https://lkml.org/lkml/2017/5/24/620

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Suggested-by: Arnd Bergmann <a...@arndb.de>
Acked-by: Geert Uytterhoeven <ge...@linux-m68k.org>
Acked-by: David S. Miller <da...@davemloft.net>
Acked-by: Stafford Horne <sho...@gmail.com>
---
 arch/frv/Kconfig  |3 +++
 arch/h8300/Kconfig|3 +++
 arch/m68k/Kconfig |3 +++
 arch/openrisc/Kconfig |3 +++
 arch/parisc/Kconfig   |3 +++
 arch/sparc/Kconfig|3 +++
 6 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index eefd9a4..1cce824 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -17,6 +17,9 @@ config FRV
select HAVE_DEBUG_STACKOVERFLOW
select ARCH_NO_COHERENT_DMA_MMAP
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config ZONE_DMA
bool
default y
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 3ae8525..5380ac8 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -23,6 +23,9 @@ config H8300
select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config RWSEM_GENERIC_SPINLOCK
def_bool y
 
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index d140206..029a58b 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -23,6 +23,9 @@ config M68K
select OLD_SIGSUSPEND3
select OLD_SIGACTION
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config RWSEM_GENERIC_SPINLOCK
bool
default y
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 1e95920..a0f2e4a 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -29,6 +29,9 @@ config OPENRISC
select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
select NO_BOOTMEM
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config MMU
def_bool y
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 531da9e..dda1f55 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -47,6 +47,9 @@ config PARISC
  and later HP3000 series).  The PA-RISC Linux project home page is
  at <http://www.parisc-linux.org/>.
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config MMU
def_bool y
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 908f019..0d9dc49 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -92,6 +92,9 @@ config ARCH_DEFCONFIG
 config ARCH_PROC_KCORE_TEXT
def_bool y
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config ARCH_ATU
bool
default y if SPARC64
-- 
1.7.1

[PATCH v3 3/3] include: warn for inconsistent endian config definition

2017-07-06 Thread Babu Moger

We have seen some generic code use config parameter CONFIG_CPU_BIG_ENDIAN
to decide the endianness.

Here are the few examples.
include/asm-generic/qrwlock.h
drivers/of/base.c
drivers/of/fdt.c
drivers/tty/serial/earlycon.c
drivers/tty/serial/serial_core.c

Display warning if CPU_BIG_ENDIAN is not defined on big endian
architecture and also warn if it defined on little endian architectures.

Here is our original discussion
https://lkml.org/lkml/2017/5/24/620

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Suggested-by: Arnd Bergmann <a...@arndb.de>
Acked-by: Geert Uytterhoeven <ge...@linux-m68k.org>
---
 include/linux/byteorder/big_endian.h|4 
 include/linux/byteorder/little_endian.h |4 
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/include/linux/byteorder/big_endian.h 
b/include/linux/byteorder/big_endian.h
index 3920414..ffd2159 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -3,5 +3,9 @@
 
 #include 
 
+#ifndef CONFIG_CPU_BIG_ENDIAN
+#warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN
+#endif
+
 #include 
 #endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */
diff --git a/include/linux/byteorder/little_endian.h 
b/include/linux/byteorder/little_endian.h
index 0805737..ba910bb 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -3,5 +3,9 @@
 
 #include 
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#warning inconsistent configuration, CONFIG_CPU_BIG_ENDIAN is set
+#endif
+
 #include 
 #endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */
-- 
1.7.1

[PATCH v3 0/3] Define CPU_BIG_ENDIAN or warn for inconsistencies

2017-07-06 Thread Babu Moger

Resending the series per Greg KH's request.

Found this problem while enabling queued rwlock on SPARC.
The parameter CONFIG_CPU_BIG_ENDIAN is used to clear the
specific byte in qrwlock structure. Without this parameter,
we clear the wrong byte.
Here is the code in include/asm-generic/qrwlock.h

static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
  {
 return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
  }

Also found few more references of this parameter in
drivers/of/base.c
drivers/of/fdt.c
drivers/tty/serial/earlycon.c
drivers/tty/serial/serial_core.c

Here is our previous discussion.
https://lkml.org/lkml/2017/5/24/620

Based on the discussion, it was decided to add CONFIG_CPU_BIG_ENDIAN
for all the fixed big endian architecture(frv, h8300, m68k, openrisc,
parisc and sparc). And warn if there are inconsistencies in this definition.

v2 -> v3:
 Added the choice statement for endianness selection for microblaze.
 Updated the Makefile for microblaze(Suggested by Arnd Bergmann) to
 properly compile for the correct format.
 Updated acks.

v1 -> v2:
 Updated the commit messages and acks.

Babu Moger (3):
  arch: Define CPU_BIG_ENDIAN for all fixed big endian archs
  arch/microblaze: Add choice for endianness and update Makefile
  include: warn for inconsistent endian config definition

 arch/frv/Kconfig|3 +++
 arch/h8300/Kconfig  |3 +++
 arch/m68k/Kconfig   |3 +++
 arch/microblaze/Kconfig |   16 
 arch/microblaze/Makefile|2 ++
 arch/openrisc/Kconfig   |3 +++
 arch/parisc/Kconfig |3 +++
 arch/sparc/Kconfig  |3 +++
 include/linux/byteorder/big_endian.h|4 
 include/linux/byteorder/little_endian.h |4 
 10 files changed, 44 insertions(+), 0 deletions(-)

[PATCH v3 2/3] arch/microblaze: Add choice for endianness and update Makefile

2017-07-06 Thread Babu Moger

microblaze architectures can be configured for either little or
big endian formats. Add a choice option for the user to select the
correct endian format(default to big endian).

Also update the Makefile so toolchain can compile for the format
it is configured for.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Signed-off-by: Arnd Bergmann <a...@arndb.de>
---
 arch/microblaze/Kconfig  |   16 
 arch/microblaze/Makefile |2 ++
 2 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 85885a5..74aa5de 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -35,6 +35,22 @@ config MICROBLAZE
select VIRT_TO_BUS
select CPU_NO_EFFICIENT_FFS
 
+# Endianness selection
+choice
+   prompt "Endianness selection"
+   default CPU_BIG_ENDIAN
+   help
+ microblaze architectures can be configured for either little or
+ big endian formats. Be sure to select the appropriate mode.
+
+config CPU_BIG_ENDIAN
+   bool "Big endian"
+
+config CPU_LITTLE_ENDIAN
+   bool "Little endian"
+
+endchoice
+
 config SWAP
def_bool n
 
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 740f2b8..1f6c486 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -35,6 +35,8 @@ endif
 CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div
 CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift
 CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
+CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian
+CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian
 
 CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
 
-- 
1.7.1

[PATCH v2 1/4] arch/sparc: Separate the exception handlers from NG4memcpy

2017-08-07 Thread Babu Moger

Separate the exception handlers from NG4memcpy so that it can be
used with new memcpy routines. Make a separate file for all these handlers.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/lib/Makefile   |2 +
 arch/sparc/lib/Memcpy_utils.S |  163 +
 arch/sparc/lib/NG4memcpy.S|  149 -
 3 files changed, 165 insertions(+), 149 deletions(-)
 create mode 100644 arch/sparc/lib/Memcpy_utils.S

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..37930c0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -36,6 +36,8 @@ lib-$(CONFIG_SPARC64) +=  NG2patch.o
 lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o
 lib-$(CONFIG_SPARC64) +=  NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
 
+lib-$(CONFIG_SPARC64) += Memcpy_utils.o
+
 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
 
diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S
new file mode 100644
index 000..f7a26e0
--- /dev/null
+++ b/arch/sparc/lib/Memcpy_utils.S
@@ -0,0 +1,163 @@
+#ifndef __ASM_MEMCPY_UTILS
+#define __ASM_MEMCPY_UTILS
+
+#include 
+#include 
+#include 
+
+ENTRY(__restore_asi_fp)
+   VISExitHalf
+   retl
+wr %g0, ASI_AIUS, %asi
+ENDPROC(__restore_asi_fp)
+
+ENTRY(__restore_asi)
+   retl
+wr %g0, ASI_AIUS, %asi
+ENDPROC(__restore_asi)
+
+ENTRY(NG4_retl_o2)
+   ba,pt   %xcc, __restore_asi
+mov%o2, %o0
+ENDPROC(NG4_retl_o2)
+ENTRY(NG4_retl_o2_plus_1)
+   ba,pt   %xcc, __restore_asi
+add%o2, 1, %o0
+ENDPROC(NG4_retl_o2_plus_1)
+ENTRY(NG4_retl_o2_plus_4)
+   ba,pt   %xcc, __restore_asi
+add%o2, 4, %o0
+ENDPROC(NG4_retl_o2_plus_4)
+ENTRY(NG4_retl_o2_plus_o5)
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5)
+ENTRY(NG4_retl_o2_plus_o5_plus_4)
+   add %o5, 4, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_4)
+ENTRY(NG4_retl_o2_plus_o5_plus_8)
+   add %o5, 8, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_8)
+ENTRY(NG4_retl_o2_plus_o5_plus_16)
+   add %o5, 16, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_16)
+ENTRY(NG4_retl_o2_plus_o5_plus_24)
+   add %o5, 24, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_24)
+ENTRY(NG4_retl_o2_plus_o5_plus_32)
+   add %o5, 32, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_32)
+ENTRY(NG4_retl_o2_plus_g1)
+   ba,pt   %xcc, __restore_asi
+add%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1)
+ENTRY(NG4_retl_o2_plus_g1_plus_1)
+   add %g1, 1, %g1
+   ba,pt   %xcc, __restore_asi
+add%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_1)
+ENTRY(NG4_retl_o2_plus_g1_plus_8)
+   add %g1, 8, %g1
+   ba,pt   %xcc, __restore_asi
+add%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_8)
+ENTRY(NG4_retl_o2_plus_o4)
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4)
+ENTRY(NG4_retl_o2_plus_o4_plus_8)
+   add %o4, 8, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8)
+ENTRY(NG4_retl_o2_plus_o4_plus_16)
+   add %o4, 16, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16)
+ENTRY(NG4_retl_o2_plus_o4_plus_24)
+   add %o4, 24, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24)
+ENTRY(NG4_retl_o2_plus_o4_plus_32)
+   add %o4, 32, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32)
+ENTRY(NG4_retl_o2_plus_o4_plus_40)
+   add %o4, 40, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40)
+ENTRY(NG4_retl_o2_plus_o4_plus_48)
+   add %o4, 48, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48)
+ENTRY(NG4_retl_o2_plus_o4_plus_56)
+   add %o4, 56, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56)
+ENTRY(NG4_retl_o2_plus_o4_plus_64)
+   add %o4, 64, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64)
+ENTRY(NG4_retl_o2_plus_o4_fp)
+   ba,pt   %xcc, __restore_asi_fp
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
+   add %o4, 8, %o4
+   ba,pt

[PATCH v2 0/4] Update memcpy, memset etc. for M7/M8 architectures

2017-08-07 Thread Babu Moger

This series of patches updates the memcpy, memset, copy_to_user, copy_from_user
etc for SPARC M7/M8 architecture.

New algorithm here takes advantage of the M7/M8 block init store ASIs, with much
more optimized way to improve the performance. More detail are in code comments.

Tested and compared the latency measured in ticks(NG4memcpy vs new M7memcpy).

1. Memset numbers(Aligned memset)

No.of bytes   NG4memsetM7memset Delta ((B-A)/A)*100
 (Avg.Ticks A) (Avg.Ticks B) (latency reduction)
  3 77  25  -67.53
  7 43  33  -23.25
  3272  68   -5.55
  128   164 44  -73.17
  256   335 68  -79.70
  512   511 220 -56.94
  1024  1552627 -59.60
  2048  35151322-62.38
  4096  63032472-60.78
  8192  13118   4867-62.89
  16384 26206   10371   -60.42
  32768 52501   18569   -64.63
  65536 100219  35899   -64.17


2. Memcpy numbers(Aligned memcpy)

No.of bytes   NG4memcpyM7memcpy Delta ((B-A)/A)*100
 (Avg.Ticks A) (Avg.Ticks B) (latency reduction)
  3 20  19  -5
  7 29  27  -6.89
  3230  28  -6.66
  128   89  69  -22.47
  256   142 143  0.70
  512   341 283 -17.00
  1024  1588655 -58.75
  2048  35531357-61.80
  4096  72182590-64.11
  8192  13701   5231-61.82
  16384 28304   10716   -62.13
  32768 56516   22995   -59.31
  65536 115443  50840   -55.96

3. Memset numbers(un-aligned memset)

No.of bytes   NG4memsetM7memset Delta ((B-A)/A)*100
 (Avg.Ticks A) (Avg.Ticks B) (latency reduction)
  3 40  31  -22.5
  7 52  29  -44.2307692308
  3289  86  -3.3707865169
  128   201 74  -63.184079602
  256   340 154 -54.7058823529
  512   961 335 -65.1404786681
  1024  1799686 -61.8677042802
  2048  35751260-64.7552447552
  4096  65602627-59.9542682927
  8192  13161   6018-54.273991338
  16384 26465   10439   -60.5554505951
  32768 52119   18649   -64.2184232238
  65536 101593  35724   -64.8361599717

4. Memcpy numbers(un-aligned memcpy)

No.of bytes   NG4memcpyM7memcpy Delta ((B-A)/A)*100
 (Avg.Ticks A) (Avg.Ticks B) (latency reduction)
  3 26  19  -26.9230769231
  7 48  45  -6.25
  3252  49  -5.7692307692
  128   284 334 17.6056338028
  256   430 482 12.0930232558
  512   646 690 6.8111455108
  1024  10511016-3.3301617507
  2048  178718181.7347509793
  4096  330933762.0247809006
  8192  81517444-8.673782358
  16384 34222   34556   0.9759803635
  32768 87851   95044   8.1877269468
  65536 158331  159572  0.7838010244

There is not much difference in numbers with Un-aligned copies
between NG4memcpy and M7memcpy because they both mostly use the
same algorithems.

v2:
 1. Fixed indentation issues found by David Miller
 2. Used ENTRY and ENDPROC for the labels in M7patch.S as suggested by David 
Miller
 3. Now M8 also will use M7memcpy. Also tested on M8 config.
 4. These patches are created on top of below M8 patches
https://patchwork.ozlabs.org/patch/792661/
https://patchwork.ozlabs.org/patch/792662/
However, I did not see these patches in sparc-next tree. It may be in queue 
now.
It is possible these patches might cause some build problems. It will 
resolve 
once all M8 patches are in sparc-next tree.

v0: Initial version

Babu Moger (4):
  arch/sparc: Separate the exception handlers from NG4memcpy
  arch/sparc: Rename exception handlers
  arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user
for M7/M8
  arch/sparc: Add accurate exception reporting

[PATCH v2 2/4] arch/sparc: Rename exception handlers

2017-08-07 Thread Babu Moger

Rename exception handlers to memcpy_xxx as these
are going to be used by new memcpy routines and these
handlers are not exclusive to NG4memcpy anymore.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/lib/Memcpy_utils.S |  120 +++---
 arch/sparc/lib/NG4memcpy.S|  128 
 2 files changed, 124 insertions(+), 124 deletions(-)

diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S
index f7a26e0..bcc5d77 100644
--- a/arch/sparc/lib/Memcpy_utils.S
+++ b/arch/sparc/lib/Memcpy_utils.S
@@ -16,148 +16,148 @@ ENTRY(__restore_asi)
 wr %g0, ASI_AIUS, %asi
 ENDPROC(__restore_asi)
 
-ENTRY(NG4_retl_o2)
+ENTRY(memcpy_retl_o2)
ba,pt   %xcc, __restore_asi
 mov%o2, %o0
-ENDPROC(NG4_retl_o2)
-ENTRY(NG4_retl_o2_plus_1)
+ENDPROC(memcpy_retl_o2)
+ENTRY(memcpy_retl_o2_plus_1)
ba,pt   %xcc, __restore_asi
 add%o2, 1, %o0
-ENDPROC(NG4_retl_o2_plus_1)
-ENTRY(NG4_retl_o2_plus_4)
+ENDPROC(memcpy_retl_o2_plus_1)
+ENTRY(memcpy_retl_o2_plus_4)
ba,pt   %xcc, __restore_asi
 add%o2, 4, %o0
-ENDPROC(NG4_retl_o2_plus_4)
-ENTRY(NG4_retl_o2_plus_o5)
+ENDPROC(memcpy_retl_o2_plus_4)
+ENTRY(memcpy_retl_o2_plus_o5)
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5)
-ENTRY(NG4_retl_o2_plus_o5_plus_4)
+ENDPROC(memcpy_retl_o2_plus_o5)
+ENTRY(memcpy_retl_o2_plus_o5_plus_4)
add %o5, 4, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_4)
-ENTRY(NG4_retl_o2_plus_o5_plus_8)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_4)
+ENTRY(memcpy_retl_o2_plus_o5_plus_8)
add %o5, 8, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_8)
-ENTRY(NG4_retl_o2_plus_o5_plus_16)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_8)
+ENTRY(memcpy_retl_o2_plus_o5_plus_16)
add %o5, 16, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_16)
-ENTRY(NG4_retl_o2_plus_o5_plus_24)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_16)
+ENTRY(memcpy_retl_o2_plus_o5_plus_24)
add %o5, 24, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_24)
-ENTRY(NG4_retl_o2_plus_o5_plus_32)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_24)
+ENTRY(memcpy_retl_o2_plus_o5_plus_32)
add %o5, 32, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_32)
-ENTRY(NG4_retl_o2_plus_g1)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_32)
+ENTRY(memcpy_retl_o2_plus_g1)
ba,pt   %xcc, __restore_asi
 add%o2, %g1, %o0
-ENDPROC(NG4_retl_o2_plus_g1)
-ENTRY(NG4_retl_o2_plus_g1_plus_1)
+ENDPROC(memcpy_retl_o2_plus_g1)
+ENTRY(memcpy_retl_o2_plus_g1_plus_1)
add %g1, 1, %g1
ba,pt   %xcc, __restore_asi
 add%o2, %g1, %o0
-ENDPROC(NG4_retl_o2_plus_g1_plus_1)
-ENTRY(NG4_retl_o2_plus_g1_plus_8)
+ENDPROC(memcpy_retl_o2_plus_g1_plus_1)
+ENTRY(memcpy_retl_o2_plus_g1_plus_8)
add %g1, 8, %g1
ba,pt   %xcc, __restore_asi
 add%o2, %g1, %o0
-ENDPROC(NG4_retl_o2_plus_g1_plus_8)
-ENTRY(NG4_retl_o2_plus_o4)
+ENDPROC(memcpy_retl_o2_plus_g1_plus_8)
+ENTRY(memcpy_retl_o2_plus_o4)
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4)
-ENTRY(NG4_retl_o2_plus_o4_plus_8)
+ENDPROC(memcpy_retl_o2_plus_o4)
+ENTRY(memcpy_retl_o2_plus_o4_plus_8)
add %o4, 8, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_8)
-ENTRY(NG4_retl_o2_plus_o4_plus_16)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_8)
+ENTRY(memcpy_retl_o2_plus_o4_plus_16)
add %o4, 16, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_16)
-ENTRY(NG4_retl_o2_plus_o4_plus_24)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_16)
+ENTRY(memcpy_retl_o2_plus_o4_plus_24)
add %o4, 24, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_24)
-ENTRY(NG4_retl_o2_plus_o4_plus_32)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_24)
+ENTRY(memcpy_retl_o2_plus_o4_plus_32)
add %o4, 32, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_32)
-ENTRY(NG4_retl_o2_plus_o4_plus_40)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_32)
+ENTRY(memcpy_retl_o2_plus_o4_plus_40)
add %o4, 40, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_40)
-ENTRY(NG4_retl_o2_plus_o4_plus_48)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_40)
+ENTRY(memcpy_retl_o2_plus_o4_plus_48)
add %o4, 48, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_48)
-ENTRY(NG4_retl_o2_plus_o4_plus_56)
+E

[PATCH v2 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7/M8

2017-08-07 Thread Babu Moger

New algorithm that takes advantage of the M7/M8 block init store
ASI, ie, overlapping pipelines and miss buffer filling.
Full details in code comments.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/kernel/head_64.S   |   16 +-
 arch/sparc/lib/M7copy_from_user.S |   41 ++
 arch/sparc/lib/M7copy_to_user.S   |   51 ++
 arch/sparc/lib/M7memcpy.S |  923 +
 arch/sparc/lib/M7memset.S |  352 ++
 arch/sparc/lib/M7patch.S  |   51 ++
 arch/sparc/lib/Makefile   |3 +
 7 files changed, 1435 insertions(+), 2 deletions(-)
 create mode 100644 arch/sparc/lib/M7copy_from_user.S
 create mode 100644 arch/sparc/lib/M7copy_to_user.S
 create mode 100644 arch/sparc/lib/M7memcpy.S
 create mode 100644 arch/sparc/lib/M7memset.S
 create mode 100644 arch/sparc/lib/M7patch.S

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..bf9a5ac 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -603,10 +603,10 @@ niagara_tlb_fixup:
be,pt   %xcc, niagara4_patch
 nop
cmp %g1, SUN4V_CHIP_SPARC_M7
-   be,pt   %xcc, niagara4_patch
+   be,pt   %xcc, sparc_m7_patch
 nop
cmp %g1, SUN4V_CHIP_SPARC_M8
-   be,pt   %xcc, niagara4_patch
+   be,pt   %xcc, sparc_m7_patch
 nop
cmp %g1, SUN4V_CHIP_SPARC_SN
be,pt   %xcc, niagara4_patch
@@ -621,6 +621,18 @@ niagara_tlb_fixup:
 
ba,a,pt %xcc, 80f
 nop
+
+sparc_m7_patch:
+   callm7_patch_copyops
+nop
+   callm7_patch_bzero
+nop
+   callm7_patch_pageops
+nop
+
+   ba,a,pt %xcc, 80f
+nop
+
 niagara4_patch:
callniagara4_patch_copyops
 nop
diff --git a/arch/sparc/lib/M7copy_from_user.S 
b/arch/sparc/lib/M7copy_from_user.S
new file mode 100644
index 000..d0689d7
--- /dev/null
+++ b/arch/sparc/lib/M7copy_from_user.S
@@ -0,0 +1,41 @@
+/*
+ * M7copy_from_user.S: SPARC M7 optimized copy from userspace.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+#define EX_LD(x)   \
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi;   \
+   .text;  \
+   .align 4;
+
+#define EX_LD_FP(x)\
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi_fp;\
+   .text;  \
+   .align 4;
+
+
+#ifndef ASI_AIUS
+#define ASI_AIUS   0x11
+#endif
+
+#define FUNC_NAME  M7copy_from_user
+#define LOAD(type,addr,dest)   type##a [addr] %asi, dest
+#define EX_RETVAL(x)   0
+
+#ifdef __KERNEL__
+#define PREAMBLE   \
+   rd  %asi, %g1;  \
+   cmp %g1, ASI_AIUS;  \
+   bne,pn  %icc, raw_copy_in_user; \
+   nop
+#endif
+
+#include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S
new file mode 100644
index 000..d3be132
--- /dev/null
+++ b/arch/sparc/lib/M7copy_to_user.S
@@ -0,0 +1,51 @@
+/*
+ * M7copy_to_user.S: SPARC M7 optimized copy to userspace.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+#define EX_ST(x)   \
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi;   \
+   .text;  \
+   .align 4;
+
+#define EX_ST_FP(x)\
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi_fp;\
+   .text;  \
+   .align 4;
+
+
+#ifndef ASI_AIUS
+#define ASI_AIUS   0x11
+#endif
+
+#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
+#endif
+
+#define FUNC_NAME  M7copy_to_user
+#define STORE(type,src,addr)   type##a src, [addr] %asi
+#define STORE_ASI  ASI_BLK_INIT_QUAD_LDD_AIUS
+#defineSTORE_MRU_ASI   ASI_ST_BLKINIT_MRU_S
+#define EX_RETVAL(x)   0
+
+#ifdef __KERNEL__
+   /* Writing to %asi is _expensive_ so we hardcode it.
+* Reading %asi to check for KERNEL_DS is comparatively
+* cheap.
+*/
+#define PREAMBLE   \
+   rd  %asi, %g1;  \
+   cmp %g1, ASI_AIUS;  \
+   bne,pn  %icc, raw_copy_in_user; \
+   nop
+#endif
+
+#include &q

[PATCH v2 4/4] arch/sparc: Add accurate exception reporting in M7memcpy

2017-08-07 Thread Babu Moger

Add accurate exception reporting in M7memcpy

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/lib/M7copy_from_user.S |   11 +-
 arch/sparc/lib/M7copy_to_user.S   |   10 +-
 arch/sparc/lib/M7memcpy.S |  396 ++--
 arch/sparc/lib/Memcpy_utils.S |  182 +
 4 files changed, 390 insertions(+), 209 deletions(-)

diff --git a/arch/sparc/lib/M7copy_from_user.S 
b/arch/sparc/lib/M7copy_from_user.S
index d0689d7..66464b3 100644
--- a/arch/sparc/lib/M7copy_from_user.S
+++ b/arch/sparc/lib/M7copy_from_user.S
@@ -5,23 +5,22 @@
  */
 
 
-#define EX_LD(x)   \
+#define EX_LD(x, y)\
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi;   \
+   .word 98b, y;   \
.text;  \
.align 4;
 
-#define EX_LD_FP(x)\
+#define EX_LD_FP(x, y) \
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi_fp;\
+   .word 98b, y##_fp;  \
.text;  \
.align 4;
 
-
 #ifndef ASI_AIUS
 #define ASI_AIUS   0x11
 #endif
@@ -35,7 +34,7 @@
rd  %asi, %g1;  \
cmp %g1, ASI_AIUS;  \
bne,pn  %icc, raw_copy_in_user; \
-   nop
+nop
 #endif
 
 #include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S
index d3be132..a60ac46 100644
--- a/arch/sparc/lib/M7copy_to_user.S
+++ b/arch/sparc/lib/M7copy_to_user.S
@@ -5,19 +5,19 @@
  */
 
 
-#define EX_ST(x)   \
+#define EX_ST(x, y)\
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi;   \
+   .word 98b, y;   \
.text;  \
.align 4;
 
-#define EX_ST_FP(x)\
+#define EX_ST_FP(x, y) \
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi_fp;\
+   .word 98b, y##_fp;  \
.text;  \
.align 4;
 
@@ -45,7 +45,7 @@
rd  %asi, %g1;  \
cmp %g1, ASI_AIUS;  \
bne,pn  %icc, raw_copy_in_user; \
-   nop
+nop
 #endif
 
 #include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S
index 0a0421d..d016fc2 100644
--- a/arch/sparc/lib/M7memcpy.S
+++ b/arch/sparc/lib/M7memcpy.S
@@ -96,17 +96,17 @@
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)   x
+#define EX_LD(x,y) x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)x
+#define EX_LD_FP(x,y)  x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)   x
+#define EX_ST(x,y) x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)x
+#define EX_ST_FP(x,y)  x
 #endif
 
 #ifndef EX_RETVAL
@@ -206,9 +206,9 @@ FUNC_NAME:
sub %o1, %o0, %o1   ! %o1 gets the difference
 7: ! dst aligning loop
add %o1, %o0, %o4
-   EX_LD(LOAD(ldub, %o4, %o4)) ! load one byte
+   EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5) ! load one byte
subcc   %o5, 1, %o5
-   EX_ST(STORE(stb, %o4, %o0))
+   EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1)
bgu,pt  %ncc, 7b
 add%o0, 1, %o0 ! advance dst
add %o1, %o0, %o1   ! restore %o1
@@ -233,64 +233,64 @@ FUNC_NAME:
ble,pn  %ncc, .Lmedl63  ! skip big loop if less than 64 bytes
 nop
 .Lmedl64:
-   EX_LD(LOAD(ldx, %o1, %o4))  ! load
+   EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63)  ! load
subcc   %o2, 64, %o2! decrement length count
-   EX_ST(STORE(stx, %o4, %o0)) ! and store
-   EX_LD(LOAD(ldx, %o1+8, %o3))! a block of 64 bytes
-   EX_ST(STORE(stx, %o3, %o0+8))
-   EX_LD(LOAD(ldx, %o1+16, %o4))
-   EX_ST(STORE(stx, %o4, %o0+16))
-   EX_LD(LOAD(ldx, %o1+24, %o3))
-   EX_ST(STORE(stx, %o3, %o0+24))
-   EX_LD(LOAD(ldx, %o1+32, %o4))   ! load
-   EX_ST(STORE(stx, %o4, %o0+32))  ! and store
-   EX_LD(LOAD(ldx, %o1+40, %o3))   ! a block of 64 bytes
+   EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64)  ! and store
+   EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64
+   EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56)
+   EX_LD(LOAD

[PATCH RFC 2/4] arch/sparc: Rename exception handlers

2017-07-27 Thread Babu Moger

Rename exception handlers to memcpy_xxx as these
are going to be used by new memcpy routines and these
handlers are not exclusive to NG4memcpy anymore.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/lib/Memcpy_utils.S |  120 +++---
 arch/sparc/lib/NG4memcpy.S|  128 
 2 files changed, 124 insertions(+), 124 deletions(-)

diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S
index f7a26e0..bcc5d77 100644
--- a/arch/sparc/lib/Memcpy_utils.S
+++ b/arch/sparc/lib/Memcpy_utils.S
@@ -16,148 +16,148 @@ ENTRY(__restore_asi)
 wr %g0, ASI_AIUS, %asi
 ENDPROC(__restore_asi)
 
-ENTRY(NG4_retl_o2)
+ENTRY(memcpy_retl_o2)
ba,pt   %xcc, __restore_asi
 mov%o2, %o0
-ENDPROC(NG4_retl_o2)
-ENTRY(NG4_retl_o2_plus_1)
+ENDPROC(memcpy_retl_o2)
+ENTRY(memcpy_retl_o2_plus_1)
ba,pt   %xcc, __restore_asi
 add%o2, 1, %o0
-ENDPROC(NG4_retl_o2_plus_1)
-ENTRY(NG4_retl_o2_plus_4)
+ENDPROC(memcpy_retl_o2_plus_1)
+ENTRY(memcpy_retl_o2_plus_4)
ba,pt   %xcc, __restore_asi
 add%o2, 4, %o0
-ENDPROC(NG4_retl_o2_plus_4)
-ENTRY(NG4_retl_o2_plus_o5)
+ENDPROC(memcpy_retl_o2_plus_4)
+ENTRY(memcpy_retl_o2_plus_o5)
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5)
-ENTRY(NG4_retl_o2_plus_o5_plus_4)
+ENDPROC(memcpy_retl_o2_plus_o5)
+ENTRY(memcpy_retl_o2_plus_o5_plus_4)
add %o5, 4, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_4)
-ENTRY(NG4_retl_o2_plus_o5_plus_8)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_4)
+ENTRY(memcpy_retl_o2_plus_o5_plus_8)
add %o5, 8, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_8)
-ENTRY(NG4_retl_o2_plus_o5_plus_16)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_8)
+ENTRY(memcpy_retl_o2_plus_o5_plus_16)
add %o5, 16, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_16)
-ENTRY(NG4_retl_o2_plus_o5_plus_24)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_16)
+ENTRY(memcpy_retl_o2_plus_o5_plus_24)
add %o5, 24, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_24)
-ENTRY(NG4_retl_o2_plus_o5_plus_32)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_24)
+ENTRY(memcpy_retl_o2_plus_o5_plus_32)
add %o5, 32, %o5
ba,pt   %xcc, __restore_asi
 add%o2, %o5, %o0
-ENDPROC(NG4_retl_o2_plus_o5_plus_32)
-ENTRY(NG4_retl_o2_plus_g1)
+ENDPROC(memcpy_retl_o2_plus_o5_plus_32)
+ENTRY(memcpy_retl_o2_plus_g1)
ba,pt   %xcc, __restore_asi
 add%o2, %g1, %o0
-ENDPROC(NG4_retl_o2_plus_g1)
-ENTRY(NG4_retl_o2_plus_g1_plus_1)
+ENDPROC(memcpy_retl_o2_plus_g1)
+ENTRY(memcpy_retl_o2_plus_g1_plus_1)
add %g1, 1, %g1
ba,pt   %xcc, __restore_asi
 add%o2, %g1, %o0
-ENDPROC(NG4_retl_o2_plus_g1_plus_1)
-ENTRY(NG4_retl_o2_plus_g1_plus_8)
+ENDPROC(memcpy_retl_o2_plus_g1_plus_1)
+ENTRY(memcpy_retl_o2_plus_g1_plus_8)
add %g1, 8, %g1
ba,pt   %xcc, __restore_asi
 add%o2, %g1, %o0
-ENDPROC(NG4_retl_o2_plus_g1_plus_8)
-ENTRY(NG4_retl_o2_plus_o4)
+ENDPROC(memcpy_retl_o2_plus_g1_plus_8)
+ENTRY(memcpy_retl_o2_plus_o4)
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4)
-ENTRY(NG4_retl_o2_plus_o4_plus_8)
+ENDPROC(memcpy_retl_o2_plus_o4)
+ENTRY(memcpy_retl_o2_plus_o4_plus_8)
add %o4, 8, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_8)
-ENTRY(NG4_retl_o2_plus_o4_plus_16)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_8)
+ENTRY(memcpy_retl_o2_plus_o4_plus_16)
add %o4, 16, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_16)
-ENTRY(NG4_retl_o2_plus_o4_plus_24)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_16)
+ENTRY(memcpy_retl_o2_plus_o4_plus_24)
add %o4, 24, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_24)
-ENTRY(NG4_retl_o2_plus_o4_plus_32)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_24)
+ENTRY(memcpy_retl_o2_plus_o4_plus_32)
add %o4, 32, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_32)
-ENTRY(NG4_retl_o2_plus_o4_plus_40)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_32)
+ENTRY(memcpy_retl_o2_plus_o4_plus_40)
add %o4, 40, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_40)
-ENTRY(NG4_retl_o2_plus_o4_plus_48)
+ENDPROC(memcpy_retl_o2_plus_o4_plus_40)
+ENTRY(memcpy_retl_o2_plus_o4_plus_48)
add %o4, 48, %o4
ba,pt   %xcc, __restore_asi
 add%o2, %o4, %o0
-ENDPROC(NG4_retl_o2_plus_o4_plus_48)
-ENTRY(NG4_retl_o2_plus_o4_plus_56)
+E

[PATCH RFC 3/4] arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7

2017-07-27 Thread Babu Moger

New algorithm that takes advantage of the M7 block init store
ASI, ie, overlapping pipelines and miss buffer filling.
Full details in code comments.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/kernel/head_64.S   |   12 +-
 arch/sparc/lib/M7copy_from_user.S |   41 ++
 arch/sparc/lib/M7copy_to_user.S   |   51 ++
 arch/sparc/lib/M7memcpy.S |  924 +
 arch/sparc/lib/M7memset.S |  354 ++
 arch/sparc/lib/M7patch.S  |   55 +++
 arch/sparc/lib/Makefile   |3 +
 7 files changed, 1439 insertions(+), 1 deletions(-)
 create mode 100644 arch/sparc/lib/M7copy_from_user.S
 create mode 100644 arch/sparc/lib/M7copy_to_user.S
 create mode 100644 arch/sparc/lib/M7memcpy.S
 create mode 100644 arch/sparc/lib/M7memset.S
 create mode 100644 arch/sparc/lib/M7patch.S

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 41a4073..a7de798 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -600,7 +600,7 @@ niagara_tlb_fixup:
be,pt   %xcc, niagara4_patch
 nop
cmp %g1, SUN4V_CHIP_SPARC_M7
-   be,pt   %xcc, niagara4_patch
+   be,pt   %xcc, sparc_m7_patch
 nop
cmp %g1, SUN4V_CHIP_SPARC_SN
be,pt   %xcc, niagara4_patch
@@ -615,6 +615,16 @@ niagara_tlb_fixup:
 
ba,a,pt %xcc, 80f
 nop
+sparc_m7_patch:
+   callm7_patch_copyops
+nop
+   callm7_patch_bzero
+nop
+   callm7_patch_pageops
+nop
+
+   ba,a,pt %xcc, 80f
+nop
 niagara4_patch:
callniagara4_patch_copyops
 nop
diff --git a/arch/sparc/lib/M7copy_from_user.S 
b/arch/sparc/lib/M7copy_from_user.S
new file mode 100644
index 000..d0689d7
--- /dev/null
+++ b/arch/sparc/lib/M7copy_from_user.S
@@ -0,0 +1,41 @@
+/*
+ * M7copy_from_user.S: SPARC M7 optimized copy from userspace.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+#define EX_LD(x)   \
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi;   \
+   .text;  \
+   .align 4;
+
+#define EX_LD_FP(x)\
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi_fp;\
+   .text;  \
+   .align 4;
+
+
+#ifndef ASI_AIUS
+#define ASI_AIUS   0x11
+#endif
+
+#define FUNC_NAME  M7copy_from_user
+#define LOAD(type,addr,dest)   type##a [addr] %asi, dest
+#define EX_RETVAL(x)   0
+
+#ifdef __KERNEL__
+#define PREAMBLE   \
+   rd  %asi, %g1;  \
+   cmp %g1, ASI_AIUS;  \
+   bne,pn  %icc, raw_copy_in_user; \
+   nop
+#endif
+
+#include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S
new file mode 100644
index 000..d3be132
--- /dev/null
+++ b/arch/sparc/lib/M7copy_to_user.S
@@ -0,0 +1,51 @@
+/*
+ * M7copy_to_user.S: SPARC M7 optimized copy to userspace.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+#define EX_ST(x)   \
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi;   \
+   .text;  \
+   .align 4;
+
+#define EX_ST_FP(x)\
+98:x;  \
+   .section __ex_table,"a";\
+   .align 4;   \
+   .word 98b, __restore_asi_fp;\
+   .text;  \
+   .align 4;
+
+
+#ifndef ASI_AIUS
+#define ASI_AIUS   0x11
+#endif
+
+#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
+#endif
+
+#define FUNC_NAME  M7copy_to_user
+#define STORE(type,src,addr)   type##a src, [addr] %asi
+#define STORE_ASI  ASI_BLK_INIT_QUAD_LDD_AIUS
+#defineSTORE_MRU_ASI   ASI_ST_BLKINIT_MRU_S
+#define EX_RETVAL(x)   0
+
+#ifdef __KERNEL__
+   /* Writing to %asi is _expensive_ so we hardcode it.
+* Reading %asi to check for KERNEL_DS is comparatively
+* cheap.
+*/
+#define PREAMBLE   \
+   rd  %asi, %g1;  \
+   cmp %g1, ASI_AIUS;  \
+   bne,pn  %icc, raw_copy_in_user; \
+   nop
+#endif
+
+#include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S
new file mode 100644
index 000..d4

[PATCH RFC 4/4] arch/sparc: Add accurate exception reporting in M7memcpy

2017-07-27 Thread Babu Moger

Add accurate exception reporting in M7memcpy

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/lib/M7copy_from_user.S |   11 +-
 arch/sparc/lib/M7copy_to_user.S   |   10 +-
 arch/sparc/lib/M7memcpy.S |  396 ++--
 arch/sparc/lib/Memcpy_utils.S |  182 +
 4 files changed, 390 insertions(+), 209 deletions(-)

diff --git a/arch/sparc/lib/M7copy_from_user.S 
b/arch/sparc/lib/M7copy_from_user.S
index d0689d7..66464b3 100644
--- a/arch/sparc/lib/M7copy_from_user.S
+++ b/arch/sparc/lib/M7copy_from_user.S
@@ -5,23 +5,22 @@
  */
 
 
-#define EX_LD(x)   \
+#define EX_LD(x, y)\
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi;   \
+   .word 98b, y;   \
.text;  \
.align 4;
 
-#define EX_LD_FP(x)\
+#define EX_LD_FP(x, y) \
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi_fp;\
+   .word 98b, y##_fp;  \
.text;  \
.align 4;
 
-
 #ifndef ASI_AIUS
 #define ASI_AIUS   0x11
 #endif
@@ -35,7 +34,7 @@
rd  %asi, %g1;  \
cmp %g1, ASI_AIUS;  \
bne,pn  %icc, raw_copy_in_user; \
-   nop
+nop
 #endif
 
 #include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S
index d3be132..a60ac46 100644
--- a/arch/sparc/lib/M7copy_to_user.S
+++ b/arch/sparc/lib/M7copy_to_user.S
@@ -5,19 +5,19 @@
  */
 
 
-#define EX_ST(x)   \
+#define EX_ST(x, y)\
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi;   \
+   .word 98b, y;   \
.text;  \
.align 4;
 
-#define EX_ST_FP(x)\
+#define EX_ST_FP(x, y) \
 98:x;  \
.section __ex_table,"a";\
.align 4;   \
-   .word 98b, __restore_asi_fp;\
+   .word 98b, y##_fp;  \
.text;  \
.align 4;
 
@@ -45,7 +45,7 @@
rd  %asi, %g1;  \
cmp %g1, ASI_AIUS;  \
bne,pn  %icc, raw_copy_in_user; \
-   nop
+nop
 #endif
 
 #include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S
index d49f702..5cb3dae 100644
--- a/arch/sparc/lib/M7memcpy.S
+++ b/arch/sparc/lib/M7memcpy.S
@@ -96,17 +96,17 @@
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)   x
+#define EX_LD(x,y) x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)x
+#define EX_LD_FP(x,y)  x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)   x
+#define EX_ST(x,y) x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)x
+#define EX_ST_FP(x,y)  x
 #endif
 
 #ifndef EX_RETVAL
@@ -207,9 +207,9 @@ FUNC_NAME:
sub %o1, %o0, %o1   ! %o1 gets the difference
 7: ! dst aligning loop
add %o1, %o0, %o4
-   EX_LD(LOAD(ldub, %o4, %o4)) ! load one byte
+   EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5) ! load one byte
subcc   %o5, 1, %o5
-   EX_ST(STORE(stb, %o4, %o0))
+   EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1)
bgu,pt  %ncc, 7b
 add%o0, 1, %o0 ! advance dst
add %o1, %o0, %o1   ! restore %o1
@@ -234,64 +234,64 @@ FUNC_NAME:
ble,pn  %ncc, .Lmedl63  ! skip big loop if less than 64 bytes
 nop
 .Lmedl64:
-   EX_LD(LOAD(ldx, %o1, %o4))  ! load
+   EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63)  ! load
subcc   %o2, 64, %o2! decrement length count
-   EX_ST(STORE(stx, %o4, %o0)) ! and store
-   EX_LD(LOAD(ldx, %o1+8, %o3))! a block of 64 bytes
-   EX_ST(STORE(stx, %o3, %o0+8))
-   EX_LD(LOAD(ldx, %o1+16, %o4))
-   EX_ST(STORE(stx, %o4, %o0+16))
-   EX_LD(LOAD(ldx, %o1+24, %o3))
-   EX_ST(STORE(stx, %o3, %o0+24))
-   EX_LD(LOAD(ldx, %o1+32, %o4))   ! load
-   EX_ST(STORE(stx, %o4, %o0+32))  ! and store
-   EX_LD(LOAD(ldx, %o1+40, %o3))   ! a block of 64 bytes
+   EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64)  ! and store
+   EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64
+   EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56)
+   EX_LD(LOAD

[PATCH RFC 1/4] arch/sparc: Separate the exception handlers from NG4memcpy

2017-07-27 Thread Babu Moger

Separate the exception handlers from NG4memcpy so that it can be
used with new memcpy routines. Make a separate file for all these handlers.

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/lib/Makefile   |2 +
 arch/sparc/lib/Memcpy_utils.S |  163 +
 arch/sparc/lib/NG4memcpy.S|  149 -
 3 files changed, 165 insertions(+), 149 deletions(-)
 create mode 100644 arch/sparc/lib/Memcpy_utils.S

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..37930c0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -36,6 +36,8 @@ lib-$(CONFIG_SPARC64) +=  NG2patch.o
 lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o
 lib-$(CONFIG_SPARC64) +=  NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
 
+lib-$(CONFIG_SPARC64) += Memcpy_utils.o
+
 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
 
diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S
new file mode 100644
index 000..f7a26e0
--- /dev/null
+++ b/arch/sparc/lib/Memcpy_utils.S
@@ -0,0 +1,163 @@
+#ifndef __ASM_MEMCPY_UTILS
+#define __ASM_MEMCPY_UTILS
+
+#include 
+#include 
+#include 
+
+ENTRY(__restore_asi_fp)
+   VISExitHalf
+   retl
+wr %g0, ASI_AIUS, %asi
+ENDPROC(__restore_asi_fp)
+
+ENTRY(__restore_asi)
+   retl
+wr %g0, ASI_AIUS, %asi
+ENDPROC(__restore_asi)
+
+ENTRY(NG4_retl_o2)
+   ba,pt   %xcc, __restore_asi
+mov%o2, %o0
+ENDPROC(NG4_retl_o2)
+ENTRY(NG4_retl_o2_plus_1)
+   ba,pt   %xcc, __restore_asi
+add%o2, 1, %o0
+ENDPROC(NG4_retl_o2_plus_1)
+ENTRY(NG4_retl_o2_plus_4)
+   ba,pt   %xcc, __restore_asi
+add%o2, 4, %o0
+ENDPROC(NG4_retl_o2_plus_4)
+ENTRY(NG4_retl_o2_plus_o5)
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5)
+ENTRY(NG4_retl_o2_plus_o5_plus_4)
+   add %o5, 4, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_4)
+ENTRY(NG4_retl_o2_plus_o5_plus_8)
+   add %o5, 8, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_8)
+ENTRY(NG4_retl_o2_plus_o5_plus_16)
+   add %o5, 16, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_16)
+ENTRY(NG4_retl_o2_plus_o5_plus_24)
+   add %o5, 24, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_24)
+ENTRY(NG4_retl_o2_plus_o5_plus_32)
+   add %o5, 32, %o5
+   ba,pt   %xcc, __restore_asi
+add%o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_32)
+ENTRY(NG4_retl_o2_plus_g1)
+   ba,pt   %xcc, __restore_asi
+add%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1)
+ENTRY(NG4_retl_o2_plus_g1_plus_1)
+   add %g1, 1, %g1
+   ba,pt   %xcc, __restore_asi
+add%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_1)
+ENTRY(NG4_retl_o2_plus_g1_plus_8)
+   add %g1, 8, %g1
+   ba,pt   %xcc, __restore_asi
+add%o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_8)
+ENTRY(NG4_retl_o2_plus_o4)
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4)
+ENTRY(NG4_retl_o2_plus_o4_plus_8)
+   add %o4, 8, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8)
+ENTRY(NG4_retl_o2_plus_o4_plus_16)
+   add %o4, 16, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16)
+ENTRY(NG4_retl_o2_plus_o4_plus_24)
+   add %o4, 24, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24)
+ENTRY(NG4_retl_o2_plus_o4_plus_32)
+   add %o4, 32, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32)
+ENTRY(NG4_retl_o2_plus_o4_plus_40)
+   add %o4, 40, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40)
+ENTRY(NG4_retl_o2_plus_o4_plus_48)
+   add %o4, 48, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48)
+ENTRY(NG4_retl_o2_plus_o4_plus_56)
+   add %o4, 56, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56)
+ENTRY(NG4_retl_o2_plus_o4_plus_64)
+   add %o4, 64, %o4
+   ba,pt   %xcc, __restore_asi
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64)
+ENTRY(NG4_retl_o2_plus_o4_fp)
+   ba,pt   %xcc, __restore_asi_fp
+add%o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
+   add %o4, 8, %o4
+   ba,pt

Re: [PATCH 2/2] include: warn for inconsistent endian config definition

2017-06-12 Thread Babu Moger



On 6/12/2017 3:51 PM, Arnd Bergmann wrote:

On Mon, Jun 12, 2017 at 10:30 PM, Babu Moger <babu.mo...@oracle.com> wrote:

Looks like microblaze can be configured to either little or big endian
formats.  How about
adding a choice statement to address this.
Here is my proposed patch.

Hi Babu,

This part looks fine, but I think we also need this one:

diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 740f2b82a182..1f6c486826a0 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -35,6 +35,8 @@ endif
  CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div
  CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift
  CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
+CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian
+CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian

  CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))


That way, we don't have to guess what the toolchain does, but rather
tell it to do whatever is configured, like we do for most other architectures.


Ok. Thanks. Arnd. Will update and resend the series.



Unfortunately we can't do the same thing on xtensa, as that no longer
supports the -mbig-endian/-mbig-endian flags in any recent gcc version
(a long time ago it had them, but they were removed along with many other
options).

 Arnd

Re: [PATCH 2/2] include: warn for inconsistent endian config definition

2017-06-12 Thread Babu Moger



On 6/12/2017 3:58 PM, Max Filippov wrote:

On Mon, Jun 12, 2017 at 1:51 PM, Arnd Bergmann  wrote:

That way, we don't have to guess what the toolchain does, but rather
tell it to do whatever is configured, like we do for most other architectures.

Unfortunately we can't do the same thing on xtensa, as that no longer
supports the -mbig-endian/-mbig-endian flags in any recent gcc version
(a long time ago it had them, but they were removed along with many other
options).

For xtensa we probably need to generate Kconfig fragment that would go
in with the variant subdirectory. That will solve this, and clean up other
options that we currently have for manual selection for xtensa, but there's
actually no choice, i.e. the option has to be selected correctly, there's only
one correct choice and otherwise the kernel either won't build or won't work.
I'll look into it.

Max. Thanks. Please update us when you are done.

Re: [PATCH 2/2] include: warn for inconsistent endian config definition

2017-06-12 Thread Babu Moger


Hi All,

On 6/10/2017 9:06 AM, kbuild test robot wrote:

Hi Babu,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.12-rc4 next-20170609]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Babu-Moger/Define-CPU_BIG_ENDIAN-or-warn-for-inconsistencies/20170610-200424
config: microblaze-mmu_defconfig (attached as .config)
compiler: microblaze-linux-gcc (GCC) 6.2.0
reproduce:
 wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
 chmod +x ~/bin/make.cross
 # save the attached .config to linux build tree
 make.cross ARCH=microblaze

All warnings (new ones prefixed by >>):

In file included from arch/microblaze/include/uapi/asm/byteorder.h:7:0,
 from include/asm-generic/bitops/le.h:5,
 from include/asm-generic/bitops.h:34,
 from arch/microblaze/include/asm/bitops.h:1,
 from include/linux/bitops.h:36,
 from include/linux/kernel.h:10,
 from include/asm-generic/bug.h:15,
 from arch/microblaze/include/asm/bug.h:1,
 from include/linux/bug.h:4,
 from include/linux/page-flags.h:9,
 from kernel/bounds.c:9:

include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent 
configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp]

 #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN
  ^~~
--
In file included from arch/microblaze/include/uapi/asm/byteorder.h:7:0,
 from include/asm-generic/bitops/le.h:5,
 from include/asm-generic/bitops.h:34,
 from arch/microblaze/include/asm/bitops.h:1,
 from include/linux/bitops.h:36,
 from include/linux/kernel.h:10,
 from include/asm-generic/bug.h:15,
 from arch/microblaze/include/asm/bug.h:1,
 from include/linux/bug.h:4,
 from include/linux/page-flags.h:9,
 from kernel/bounds.c:9:

include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent 
configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp]

 #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN
  ^~~
In file included from arch/microblaze/include/uapi/asm/byteorder.h:7:0,
 from include/asm-generic/bitops/le.h:5,
 from include/asm-generic/bitops.h:34,
 from arch/microblaze/include/asm/bitops.h:1,
 from include/linux/bitops.h:36,
 from include/linux/kernel.h:10,
 from include/linux/list.h:8,
 from include/linux/rculist.h:9,
 from include/linux/pid.h:4,
 from include/linux/sched.h:13,
 from arch/microblaze/kernel/asm-offsets.c:13:

include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent 
configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp]

 #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN
  ^~~
:1326:2: warning: #warning syscall statx not implemented [-Wcpp]

vim +7 include/linux/byteorder/big_endian.h

  1 #ifndef _LINUX_BYTEORDER_BIG_ENDIAN_H
  2 #define _LINUX_BYTEORDER_BIG_ENDIAN_H
  3 
  4 #include 
  5 
  6 #ifndef CONFIG_CPU_BIG_ENDIAN
> 7  #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN
  8 #endif
  9 
 10 #include 
 11 #endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


Looks like microblaze can be configured to either little or big endian 
formats.  How about

adding a choice statement to address this.
Here is my proposed patch.
===
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 85885a5..74aa5de 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -35,6 +35,22 @@ config MICROBLAZE
select VIRT_TO_BUS
select CPU_NO_EFFICIENT_FFS

+# Endianness selection
+choice
+   prompt "Endianness selection"
+   default CPU_BIG_ENDIAN
+   help
+ microblaze architectures can be configured for either little or
+ big endian formats. Be sure to select the appropriate mode.
+
+config CPU_BIG_ENDIAN
+   bool "Big endian"
+
+config CPU_LITTLE_ENDIAN
+   bool "Little endian"
+
+endchoice
+
 config SWAP
def_bool n

[PATCH v3 1/3] arch: Define CPU_BIG_ENDIAN for all fixed big endian archs

2017-06-12 Thread Babu Moger

While working on enabling queued rwlock on SPARC, found
this following code in include/asm-generic/qrwlock.h
which uses CONFIG_CPU_BIG_ENDIAN to clear a byte.

static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
 {
return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
 }

Problem is many of the fixed big endian architectures don't define
CPU_BIG_ENDIAN and clears the wrong byte.

Define CPU_BIG_ENDIAN for all the fixed big endian architecture to fix it.

Also found few more references of this config parameter in
drivers/of/base.c
drivers/of/fdt.c
drivers/tty/serial/earlycon.c
drivers/tty/serial/serial_core.c
Be aware that this may cause regressions if someone has worked-around
problems in the above code already. Remove the work-around.

Here is our original discussion
https://lkml.org/lkml/2017/5/24/620

Signed-off-by: Babu Moger <babu.mo...@oracle.com>
Suggested-by: Arnd Bergmann <a...@arndb.de>
Acked-by: Geert Uytterhoeven <ge...@linux-m68k.org>
Acked-by: David S. Miller <da...@davemloft.net>
Acked-by: Stafford Horne <sho...@gmail.com>
---
 arch/frv/Kconfig  |3 +++
 arch/h8300/Kconfig|3 +++
 arch/m68k/Kconfig |3 +++
 arch/openrisc/Kconfig |3 +++
 arch/parisc/Kconfig   |3 +++
 arch/sparc/Kconfig|3 +++
 6 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index eefd9a4..1cce824 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -17,6 +17,9 @@ config FRV
select HAVE_DEBUG_STACKOVERFLOW
select ARCH_NO_COHERENT_DMA_MMAP
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config ZONE_DMA
bool
default y
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 3ae8525..5380ac8 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -23,6 +23,9 @@ config H8300
select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config RWSEM_GENERIC_SPINLOCK
def_bool y
 
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index d140206..029a58b 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -23,6 +23,9 @@ config M68K
select OLD_SIGSUSPEND3
select OLD_SIGACTION
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config RWSEM_GENERIC_SPINLOCK
bool
default y
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 1e95920..a0f2e4a 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -29,6 +29,9 @@ config OPENRISC
select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
select NO_BOOTMEM
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config MMU
def_bool y
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 531da9e..dda1f55 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -47,6 +47,9 @@ config PARISC
  and later HP3000 series).  The PA-RISC Linux project home page is
  at <http://www.parisc-linux.org/>.
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config MMU
def_bool y
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 908f019..0d9dc49 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -92,6 +92,9 @@ config ARCH_DEFCONFIG
 config ARCH_PROC_KCORE_TEXT
def_bool y
 
+config CPU_BIG_ENDIAN
+   def_bool y
+
 config ARCH_ATU
bool
default y if SPARC64
-- 
1.7.1

1 2 3 4 5 6 >

1 - 100 of 561 matches

Mail list logo